Add optional parameters to force prediction type in ML pred task creation, and add a method to re-guess the task parameters with a option to force pred type

Louis Pouillot · Louis Pouillot · commit d6953517c469 · 2018-10-30T10:52:41.000+01:00
diff --git a/dataikuapi/dss/ml.py b/dataikuapi/dss/ml.py
@@ -78,7 +78,7 @@ def set_split_explicit(self, train_selection, test_selection, dataset_name=None,
             sp['efsdDatasetSmartName'] = dataset_name
             sp['efsdTrain'] = train_split
             sp['efsdTest'] = test_split
-        else:            
+        else:
             sp["ttPolicy"] = "EXPLICIT_FILTERING_TWO_DATASETS"
             train_split ={'datasetSmartName' : dataset_name}
             test_split = {'datasetSmartName' : test_dataset_name}
@@ -373,7 +373,7 @@ def get_split_info(self):
         info['nSamples'] = nSamples[self.i] if nSamples is not None else None
         info['threshold'] = thresholds[self.i] if thresholds is not None else None
         return info
- 
+
 class DSSTree(object):
     def __init__(self, tree, feature_names):
         self.tree = tree
@@ -677,7 +677,7 @@ def delete(self):
         """
         return self.client._perform_json(
                 "DELETE", "/projects/%s/models/lab/%s/%s/" % (self.project_key, self.analysis_id, self.mltask_id))
-                
+
 
     def wait_guess_complete(self):
         """
@@ -700,7 +700,7 @@ def get_status(self):
         """
         return self.client._perform_json(
                 "GET", "/projects/%s/models/lab/%s/%s/status" % (self.project_key, self.analysis_id, self.mltask_id))
-                
+
 
     def get_settings(self):
         """
@@ -921,3 +921,25 @@ def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, acti
             "POST", "/projects/%s/models/lab/%s/%s/models/%s/actions/redeployToFlow" % (self.project_key, self.analysis_id, self.mltask_id, model_id),
             body = obj)
 
+    def start_guess(self,
+                    prediction_type=None,
+                    wait_guess_complete=True):
+        """
+        Guess the feature handling and the algorithms.
+        :param string prediction_type: In case of a prediction problem the prediction type can be specify. Valid values are BINARY_CLASSIFICATION, REGRESSION, MULTICLASS.
+        :param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state.
+                                            You should wait for the guessing to be completed by calling
+                                            ``wait_guess_complete`` on the returned object before doing anything
+                                            else (in particular calling ``train`` or ``get_settings``)
+        :return:
+        """
+        obj = {}
+        if prediction_type is not None:
+            obj["predictionType"] = prediction_type
+
+        self.client._perform_empty("PUT",
+                                   "/projects/%s/models/lab/%s/%s/guess" % (self.project_key, self.analysis_id, self.mltask_id),
+                                   params=obj)
+
+        if wait_guess_complete:
+            self.wait_guess_complete()
diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py
@@ -190,30 +190,38 @@ def create_dataset(self, dataset_name, type,
     ########################################################
 
     def create_prediction_ml_task(self, input_dataset, target_variable,
-                                  ml_backend_type = "PY_MEMORY",
-                                  guess_policy = "DEFAULT",
+                                  ml_backend_type="PY_MEMORY",
+                                  guess_policy="DEFAULT",
+                                  prediction_type=None,
                                   wait_guess_complete=True):
 
         """Creates a new prediction task in a new visual analysis lab
         for a dataset.
 
+        :param string input_dataset: the dataset to use for training/testing the model
+        :param string target_variable: the variable to predict
         :param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
         :param string guess_policy: Policy to use for setting the default parameters.  Valid values are: DEFAULT, SIMPLE_FORMULA, DECISION_TREE, EXPLANATORY and PERFORMANCE
+        :param string prediction_type: The type of prediction problem this is. If not provided the prediction type will be guessed. Valid values are: BINARY_CLASSIFICATION, REGRESSION, MULTICLASS
         :param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state, i.e. analyzing the input dataset to determine feature handling and algorithms.
                                             You should wait for the guessing to be completed by calling
                                             ``wait_guess_complete`` on the returned object before doing anything
                                             else (in particular calling ``train`` or ``get_settings``)
         """
         obj = {
-            "inputDataset" : input_dataset,
-            "taskType" : "PREDICTION",
-            "targetVariable" : target_variable,
+            "inputDataset": input_dataset,
+            "taskType": "PREDICTION",
+            "targetVariable": target_variable,
             "backendType": ml_backend_type,
             "guessPolicy":  guess_policy
         }
 
+        if prediction_type is not None:
+            obj["predictionType"] = prediction_type
+
         ref = self.client._perform_json("POST", "/projects/%s/models/lab/" % self.project_key, body=obj)
         ret = DSSMLTask(self.client, self.project_key, ref["analysisId"], ref["mlTaskId"])
+
         if wait_guess_complete:
             ret.wait_guess_complete()
         return ret