Skip to content

Commit d2d5ca4

Browse files
committed
Merge remote-tracking branch 'origin/master' into feature/new-dataset-management-apis-patch + cleanup
2 parents 6b0f762 + 80cae22 commit d2d5ca4

File tree

6 files changed

+359
-145
lines changed

6 files changed

+359
-145
lines changed

dataikuapi/dss/dataset.py

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from ..utils import DataikuStreamedHttpUTF8CSVReader
44
from .future import DSSFuture
55
import json, warnings
6-
from .utils import DSSTaggableObjectListItem
6+
from .utils import DSSTaggableObjectListItem, DSSTaggableObjectSettings
77
from .future import DSSFuture
88
from .metrics import ComputedMetrics
99
from .discussion import DSSObjectDiscussions
@@ -402,13 +402,31 @@ def create_analysis(self):
402402
"""
403403
return self.project_create_analysis(self.dataset_name)
404404

405-
def list_analyses(self):
405+
def list_analyses(self, as_type="listitems"):
406406
"""
407407
List the visual analyses on this dataset
408-
:return list of dicts
408+
:param str as_type: How to return the list. Supported values are "listitems" and "objects".
409+
:returns: The list of the analyses. If "as_type" is "listitems", each one as a dict,
410+
If "as_type" is "objects", each one as a :class:`dataikuapi.dss.analysis.DSSAnalysis`
411+
:rtype: list
409412
"""
410-
analysis_list = self.project.list_analyses()
411-
return [desc for desc in analysis_list if self.dataset_name == desc.get('inputDataset')]
413+
analysis_list = [al for al in self.project.list_analyses() if self.dataset_name == al.get('inputDataset')]
414+
415+
if as_type == "listitems" or as_type == "listitem":
416+
return analysis_list
417+
elif as_type == "objects" or as_type == "object":
418+
return [self.project.get_analysis(item["analysisId"])for item in analysis_list]
419+
else:
420+
raise ValueError("Unknown as_type")
421+
422+
def delete_analyses(self, drop_data=False):
423+
"""
424+
Deletes all analyses that have this dataset as input dataset. Also deletes
425+
ML tasks that are part of the analysis
426+
427+
:param: bool drop_data: whether to drop data for all ML tasks in the analysis
428+
"""
429+
[analysis.delete(drop_data=drop_data) for analysis in self.list_analyses(as_type="objects")]
412430

413431
########################################################
414432
# Statistics worksheets
@@ -607,36 +625,9 @@ def new_recipe(self, type, recipe_name=None):
607625
builder.with_input(self.dataset_name)
608626
return builder
609627

610-
########################################################
611-
# Creation of analyses
612-
########################################################
613-
614-
def new_analysis(self):
615-
analysis = self.project.create_analysis(self.name)
616-
return analysis
617-
618-
def list_analyses(self):
619-
"""Returns a list of json short description description of analysis that has this dataset as inputDataset
620-
621-
:return: list of dict with keys {'analysisId', 'analysisName', 'inputDataset'}
622-
623-
"""
624-
project_analysis_desc_list = self.project.list_analyses()
625-
return [desc for desc in project_analysis_desc_list if self.name == desc.get('inputDataset')]
626-
627-
def delete_analyses(self, drop_data=False):
628-
"""Deletes all analyses that have this dataset as inputDataset
629-
630-
:param: bool drop_data: will drop analysis data if True. Default is False
631-
"""
632-
633-
desc_list = self.list_analyses()
634-
dss_analysis_list = [self.project.get_analysis(desc['analysisId']) for desc in desc_list]
635-
return [analysis.delete(drop_data=drop_data) for analysis in dss_analysis_list]
636-
637-
638-
class DSSDatasetSettings(object):
628+
class DSSDatasetSettings(DSSTaggableObjectSettings):
639629
def __init__(self, dataset, settings):
630+
super(DSSDatasetSettings, self).__init__(settings)
640631
self.dataset = dataset
641632
self.settings = settings
642633

dataikuapi/dss/flow.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ def replace_input_computable(self, current_ref, new_ref, type="DATASET"):
4141
settings.save()
4242

4343
for recipe in self.project.list_recipes():
44-
fake_rap = DSSRecipeDefinitionAndPayload({"recipe" : recipe})
44+
recipe_handle = self.project.get_recipe(recipe["name"])
45+
fake_rap = DSSRecipeDefinitionAndPayload(recipe_handle, {"recipe" : recipe})
4546
if fake_rap.has_input(current_ref):
4647
logging.info("Recipe %s has %s as input, performing the replacement by %s"% \
4748
(recipe["name"], current_ref, new_ref))

dataikuapi/dss/ml.py

Lines changed: 15 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,13 @@ def set_split_explicit(self, train_selection, test_selection, dataset_name=None,
125125

126126
return self
127127

128-
def set_order_by(self, feature_name, ascending=True):
128+
def set_time_ordering(self, feature_name, ascending=True):
129129
"""
130-
Uses a variable to sort the data for train/test split and hyperparameter optimization
130+
Uses a variable to sort the data for train/test split and hyperparameter optimization by time
131131
:param str feature_name: Name of the variable to use
132132
:param bool ascending: True iff the test set is expected to have larger time values than the train set
133133
"""
134-
self.unset_order_by()
134+
self.unset_time_ordering()
135135
if not feature_name in self.mltask_settings["preprocessing"]["per_feature"]:
136136
raise ValueError("Feature %s doesn't exist in this ML task, can't use as time" % feature_name)
137137
self.mltask_settings['time']['enabled'] = True
@@ -148,9 +148,9 @@ def set_order_by(self, feature_name, ascending=True):
148148

149149
return self
150150

151-
def unset_order_by(self):
151+
def unset_time_ordering(self):
152152
"""
153-
Remove time-based ordering.
153+
Remove time-based ordering for train/test split and hyperparameter optimization
154154
"""
155155
self.mltask_settings['time']['enabled'] = False
156156
self.mltask_settings['time']['timeVariable'] = None
@@ -187,28 +187,6 @@ def get_raw(self):
187187
"""
188188
return self.mltask_settings
189189

190-
def get_split_params(self):
191-
"""
192-
Gets a handle to modify train/test splitting params.
193-
194-
:rtype: :class:`PredictionSplitParamsHandler`
195-
"""
196-
raise NotImplementedError("get_split_params not available for class {}".format(self.__class__))
197-
198-
def split_ordered_by(self, feature_name, ascending=True):
199-
"""
200-
Uses a variable to sort the data for train/test split and hyperparameter optimization
201-
:param str feature_name: Name of the variable to use
202-
:param bool ascending: True iff the test set is expected to have larger time values than the train set
203-
"""
204-
raise NotImplementedError("split_ordered_by not available for class {}".format(self.__class__))
205-
206-
def remove_ordered_split(self):
207-
"""
208-
Remove time-based ordering.
209-
"""
210-
raise NotImplementedError("remove_ordered_split not available for class {}".format(self.__class__))
211-
212190
def get_feature_preprocessing(self, feature_name):
213191
"""
214192
Gets the feature preprocessing params for a particular feature. This returns a reference to the
@@ -249,18 +227,6 @@ def use_feature(self, feature_name):
249227
"""
250228
self.get_feature_preprocessing(feature_name)["role"] = "INPUT"
251229

252-
def use_sample_weighting(self, feature_name):
253-
"""
254-
Deprecated. Will be removed from DSSMLTaskSettings class
255-
"""
256-
raise NotImplementedError("use_sample_weighting() not available for class {}".format(self.__class__))
257-
258-
def remove_sample_weighting(self):
259-
"""
260-
Deprecated. Will be removed from DSSMLTaskSettings class
261-
"""
262-
raise NotImplementedError("remove_sample_weighting() not available for class {}".format(self.__class__))
263-
264230
def get_algorithm_settings(self, algorithm_name):
265231
"""
266232
Gets the training settings for a particular algorithm. This returns a reference to the
@@ -419,31 +385,21 @@ def get_split_params(self):
419385
"""
420386
return PredictionSplitParamsHandler(self.mltask_settings)
421387

422-
@split_params.setter
423-
def split_params(self, value):
424-
raise AttributeError("split_params reference cannot be overwritten, get a handle and modify it with a set method instead")
425-
426388
def split_ordered_by(self, feature_name, ascending=True):
427389
"""
428-
Uses a variable to sort the data for train/test split and hyperparameter optimization
429-
:param str feature_name: Name of the variable to use
430-
:param bool ascending: True iff the test set is expected to have larger time values than the train set
431-
432-
:rtype: self
390+
Deprecated. Use split_params.set_time_ordering()
433391
"""
434-
warnings.warn("split_ordered_by() is deprecated, please use split_params.set_order_by() instead", DeprecationWarning)
435-
self.split_params.set_order_by(feature_name, ascending=True)
392+
warnings.warn("split_ordered_by() is deprecated, please use split_params.set_time_ordering() instead", DeprecationWarning)
393+
self.split_params.set_time_ordering(feature_name, ascending=ascending)
436394

437395
return self
438396

439397
def remove_ordered_split(self):
440398
"""
441-
Remove time-based ordering.
442-
443-
:rtype: self
399+
Deprecated. Use split_params.unset_time_ordering()
444400
"""
445-
warnings.warn("remove_ordered_split() is deprecated, please use split_params.unset_order_by() instead", DeprecationWarning)
446-
self.split_params.unset_order_by()
401+
warnings.warn("remove_ordered_split() is deprecated, please use split_params.unset_time_ordering() instead", DeprecationWarning)
402+
self.split_params.unset_time_ordering()
447403

448404
return self
449405

@@ -456,8 +412,10 @@ def use_sample_weighting(self, feature_name):
456412

457413
def set_weighting(self, method, feature_name=None):
458414
"""
459-
Uses a feature as sample weight
460-
:param str feature_name: Name of the feature to use
415+
Sets the method to weight samples.
416+
:param str method: Method to use. One of NO_WEIGHTING, SAMPLE_WEIGHT (must give a feature name),
417+
CLASS_WEIGHT or CLASS_AND_SAMPLE_WEIGHT (must give a feature name)
418+
:param str feature_name: Name of the feature to use as sample weight
461419
"""
462420
self.unset_weighting()
463421

0 commit comments

Comments
 (0)