Merge PR #32 predictionType option creating MLTask

instanceofme · instanceofme · commit d2ac28073b27 · 2018-11-07T18:03:10.000+01:00
from feature/dss50-create-ml-task-with-prediction-type
diff --git a/dataikuapi/dss/analysis.py b/dataikuapi/dss/analysis.py
@@ -152,24 +152,24 @@ def set_definition(self, definition):
     # ML
     ########################################################
 
-    def create_prediction_ml_task(self, target_variable,
-                                   ml_backend_type = "PY_MEMORY",
-                                   guess_policy = "DEFAULT"):
-
-
+    def create_prediction_ml_task(self,
+                                  target_variable,
+                                  ml_backend_type="PY_MEMORY",
+                                  guess_policy="DEFAULT",
+                                  prediction_type=None,
+                                  wait_guess_complete=True):
         """Creates a new prediction task in this visual analysis lab
         for a dataset.
 
-
-        The returned ML task will be in 'guessing' state, i.e. analyzing
-        the input dataset to determine feature handling and algorithms.
-
-        You should wait for the guessing to be completed by calling
-        ``wait_guess_complete`` on the returned object before doing anything
-        else (in particular calling ``train`` or ``get_settings``)
-
+        :param string target_variable: Variable to predict
         :param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
         :param string guess_policy: Policy to use for setting the default parameters.  Valid values are: DEFAULT, SIMPLE_FORMULA, DECISION_TREE, EXPLANATORY and PERFORMANCE
+        :param string prediction_type: The type of prediction problem this is. If not provided the prediction type will be guessed. Valid values are: BINARY_CLASSIFICATION, REGRESSION, MULTICLASS
+        :param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state, i.e. analyzing the input dataset to determine feature handling and algorithms.
+                                            You should wait for the guessing to be completed by calling
+                                            ``wait_guess_complete`` on the returned object before doing anything
+                                            else (in particular calling ``train`` or ``get_settings``)
+        :return :class dataiku.dss.ml.DSSMLTask
         """
 
         obj = {
@@ -178,9 +178,14 @@ def create_prediction_ml_task(self, target_variable,
             "backendType": ml_backend_type,
             "guessPolicy":  guess_policy
         }
-
+        if prediction_type is not None:
+            obj["predictionType"] = prediction_type
         ref = self.client._perform_json("POST", "/projects/%s/lab/%s/models/" % (self.project_key, self.analysis_id), body=obj)
-        return DSSMLTask(self.client, self.project_key, self.analysis_id, ref["mlTaskId"])
+        mltask = DSSMLTask(self.client, self.project_key, self.analysis_id, ref["mlTaskId"])
+
+        if wait_guess_complete:
+            mltask.wait_guess_complete()
+        return mltask
 
     def create_clustering_ml_task(self,
                                    ml_backend_type = "PY_MEMORY",
diff --git a/dataikuapi/dss/ml.py b/dataikuapi/dss/ml.py
@@ -78,7 +78,7 @@ def set_split_explicit(self, train_selection, test_selection, dataset_name=None,
             sp['efsdDatasetSmartName'] = dataset_name
             sp['efsdTrain'] = train_split
             sp['efsdTest'] = test_split
-        else:            
+        else:
             sp["ttPolicy"] = "EXPLICIT_FILTERING_TWO_DATASETS"
             train_split ={'datasetSmartName' : dataset_name}
             test_split = {'datasetSmartName' : test_dataset_name}
@@ -373,7 +373,7 @@ def get_split_info(self):
         info['nSamples'] = nSamples[self.i] if nSamples is not None else None
         info['threshold'] = thresholds[self.i] if thresholds is not None else None
         return info
- 
+
 class DSSTree(object):
     def __init__(self, tree, feature_names):
         self.tree = tree
@@ -677,7 +677,7 @@ def delete(self):
         """
         return self.client._perform_json(
                 "DELETE", "/projects/%s/models/lab/%s/%s/" % (self.project_key, self.analysis_id, self.mltask_id))
-                
+
 
     def wait_guess_complete(self):
         """
@@ -700,7 +700,7 @@ def get_status(self):
         """
         return self.client._perform_json(
                 "GET", "/projects/%s/models/lab/%s/%s/status" % (self.project_key, self.analysis_id, self.mltask_id))
-                
+
 
     def get_settings(self):
         """
@@ -921,3 +921,17 @@ def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, acti
             "POST", "/projects/%s/models/lab/%s/%s/models/%s/actions/redeployToFlow" % (self.project_key, self.analysis_id, self.mltask_id, model_id),
             body = obj)
 
+    def guess(self, prediction_type=None):
+        """
+        Guess the feature handling and the algorithms.
+        :param string prediction_type: In case of a prediction problem the prediction type can be specify. Valid values are BINARY_CLASSIFICATION, REGRESSION, MULTICLASS.
+        """
+        obj = {}
+        if prediction_type is not None:
+            obj["predictionType"] = prediction_type
+
+        self.client._perform_empty(
+            "PUT",
+            "/projects/%s/models/lab/%s/%s/guess" % (self.project_key, self.analysis_id, self.mltask_id),
+            params = obj)
+
diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py
@@ -190,30 +190,38 @@ def create_dataset(self, dataset_name, type,
     ########################################################
 
     def create_prediction_ml_task(self, input_dataset, target_variable,
-                                  ml_backend_type = "PY_MEMORY",
-                                  guess_policy = "DEFAULT",
+                                  ml_backend_type="PY_MEMORY",
+                                  guess_policy="DEFAULT",
+                                  prediction_type=None,
                                   wait_guess_complete=True):
 
         """Creates a new prediction task in a new visual analysis lab
         for a dataset.
 
+        :param string input_dataset: the dataset to use for training/testing the model
+        :param string target_variable: the variable to predict
         :param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
         :param string guess_policy: Policy to use for setting the default parameters.  Valid values are: DEFAULT, SIMPLE_FORMULA, DECISION_TREE, EXPLANATORY and PERFORMANCE
+        :param string prediction_type: The type of prediction problem this is. If not provided the prediction type will be guessed. Valid values are: BINARY_CLASSIFICATION, REGRESSION, MULTICLASS
         :param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state, i.e. analyzing the input dataset to determine feature handling and algorithms.
                                             You should wait for the guessing to be completed by calling
                                             ``wait_guess_complete`` on the returned object before doing anything
                                             else (in particular calling ``train`` or ``get_settings``)
         """
         obj = {
-            "inputDataset" : input_dataset,
-            "taskType" : "PREDICTION",
-            "targetVariable" : target_variable,
+            "inputDataset": input_dataset,
+            "taskType": "PREDICTION",
+            "targetVariable": target_variable,
             "backendType": ml_backend_type,
             "guessPolicy":  guess_policy
         }
 
+        if prediction_type is not None:
+            obj["predictionType"] = prediction_type
+
         ref = self.client._perform_json("POST", "/projects/%s/models/lab/" % self.project_key, body=obj)
         ret = DSSMLTask(self.client, self.project_key, ref["analysisId"], ref["mlTaskId"])
+
         if wait_guess_complete:
             ret.wait_guess_complete()
         return ret