Skip to content

Commit e950718

Browse files
committed
Ported from Kiel: Copy the ML API from DSSProject to DSSDataset
1 parent e308c20 commit e950718

File tree

2 files changed

+63
-1
lines changed

2 files changed

+63
-1
lines changed

dataikuapi/dss/dataset.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,67 @@ def uploaded_list_files(self):
336336
"""
337337
return self.client._perform_json("GET", "/projects/%s/datasets/%s/uploaded/files" % (self.project_key, self.dataset_name))
338338

339+
########################################################
340+
# Lab and ML
341+
# Don't forget to synchronize with DSSProject.*
342+
########################################################
343+
344+
def create_prediction_ml_task(self, target_variable,
345+
ml_backend_type="PY_MEMORY",
346+
guess_policy="DEFAULT",
347+
prediction_type=None,
348+
wait_guess_complete=True):
349+
350+
"""Creates a new prediction task in a new visual analysis lab
351+
for a dataset.
352+
353+
:param string input_dataset: the dataset to use for training/testing the model
354+
:param string target_variable: the variable to predict
355+
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
356+
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: DEFAULT, SIMPLE_FORMULA, DECISION_TREE, EXPLANATORY and PERFORMANCE
357+
:param string prediction_type: The type of prediction problem this is. If not provided the prediction type will be guessed. Valid values are: BINARY_CLASSIFICATION, REGRESSION, MULTICLASS
358+
:param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state, i.e. analyzing the input dataset to determine feature handling and algorithms.
359+
You should wait for the guessing to be completed by calling
360+
``wait_guess_complete`` on the returned object before doing anything
361+
else (in particular calling ``train`` or ``get_settings``)
362+
"""
363+
return self.project.create_prediction_ml_task(self.dataset_name,
364+
target_variable = target_variable, ml_backend_type = ml_backend_type,
365+
guess_policy = guess_policy, prediction_type = prediction_type, wait_guess_complete = wait_guess_complete)
366+
367+
def create_clustering_ml_task(self, input_dataset,
368+
ml_backend_type = "PY_MEMORY",
369+
guess_policy = "KMEANS"):
370+
"""Creates a new clustering task in a new visual analysis lab
371+
for a dataset.
372+
373+
374+
The returned ML task will be in 'guessing' state, i.e. analyzing
375+
the input dataset to determine feature handling and algorithms.
376+
377+
You should wait for the guessing to be completed by calling
378+
``wait_guess_complete`` on the returned object before doing anything
379+
else (in particular calling ``train`` or ``get_settings``)
380+
381+
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
382+
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: KMEANS and ANOMALY_DETECTION
383+
"""
384+
return self.project.create_clustering_ml_task(self.dataset_name,
385+
ml_backend_type = ml_backend_type, guess_policy = guess_policy)
386+
387+
def create_analysis(self):
388+
"""
389+
Creates a new visual analysis lab
390+
"""
391+
return self.project_create_analysis(self.dataset_name)
392+
393+
def list_analyses(self):
394+
"""
395+
List the visual analyses on this dataset
396+
:return list of dicts
397+
"""
398+
analysis_list = self.project.list_analyses()
399+
return [desc for desc in analysis_list if self.dataset_name == desc.get('inputDataset')]
339400

340401
########################################################
341402
# Statistics worksheets

dataikuapi/dss/project.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,8 @@ def new_managed_dataset_creation_helper(self, dataset_name):
345345
return DSSManagedDatasetCreationHelper(self, dataset_name)
346346

347347
########################################################
348-
# ML
348+
# Lab and ML
349+
# Don't forget to synchronize with DSSDataset.*
349350
########################################################
350351

351352
def create_prediction_ml_task(self, input_dataset, target_variable,

0 commit comments

Comments
 (0)