Skip to content

Commit 2ed0031

Browse files
committed
Merge remote-tracking branch 'origin/release/4.2' into release/4.3
2 parents fa9a3dc + 76f7917 commit 2ed0031

File tree

3 files changed

+35
-22
lines changed

3 files changed

+35
-22
lines changed

HISTORY.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Changelog
22
==========
33

4+
4.2.1 (2018-04-30)
5+
-------------------
6+
7+
* Initial release for DSS 4.2
8+
49
4.1.0 (2018-01-10)
510
-------------------
611

dataikuapi/dss/ml.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ def set_split_random(self, train_ratio = 0.8, selection = None, dataset_name=Non
1818
Sets the train/test split to random splitting of an extract of a single dataset
1919
2020
:param float train_ratio: Ratio of rows to use for train set. Must be between 0 and 1
21-
:param object selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
22-
:param str dataset_name: Name of dataset to split. If None, the main dataset used to create the ML Task will be used.
21+
:param object selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
22+
:param str dataset_name: Name of dataset to split. If None, the main dataset used to create the visual analysis will be used.
2323
"""
2424
sp = self.mltask_settings["splitParams"]
2525
sp["ttPolicy"] = "SPLIT_SINGLE_DATASET"
@@ -40,8 +40,8 @@ def set_split_kfold(self, n_folds = 5, selection = None, dataset_name=None):
4040
Sets the train/test split to k-fold splitting of an extract of a single dataset
4141
4242
:param int n_folds: number of folds. Must be greater than 0
43-
:param object selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
44-
:param str dataset_name: Name of dataset to split. If None, the main dataset used to create the ML Task will be used.
43+
:param object selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the dataset. May be None (won't be changed)
44+
:param str dataset_name: Name of dataset to split. If None, the main dataset used to create the visual analysis will be used.
4545
"""
4646
sp = self.mltask_settings["splitParams"]
4747
sp["ttPolicy"] = "SPLIT_SINGLE_DATASET"
@@ -59,14 +59,14 @@ def set_split_kfold(self, n_folds = 5, selection = None, dataset_name=None):
5959

6060
def set_split_explicit(self, train_selection, test_selection, dataset_name=None, test_dataset_name=None, train_filter=None, test_filter=None):
6161
"""
62-
Sets the train/test split to explicit extract of one or two dataset
62+
Sets the train/test split to explicit extract of one or two dataset(s)
6363
64-
:param object train_selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the train dataset. May be None (won't be changed)
65-
:param object test_selection: A :class:`DSSDatasetSelectionBuilder` to build the settings of the extract of the test dataset. May be None (won't be changed)
64+
:param object train_selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the train dataset. May be None (won't be changed)
65+
:param object test_selection: A :class:`~dataikuapi.dss.utils.DSSDatasetSelectionBuilder` to build the settings of the extract of the test dataset. May be None (won't be changed)
6666
:param str dataset_name: Name of dataset to use for the extracts. If None, the main dataset used to create the ML Task will be used.
6767
:param str test_dataset_name: Name of a second dataset to use for the test data extract. If None, both extracts are done from dataset_name
68-
:param object train_filter: A :class:`DSSFilterBuilder` to build the settings of the filter of the train dataset. May be None (won't be changed)
69-
:param object test_filter: A :class:`DSSFilterBuilder` to build the settings of the filter of the test dataset. May be None (won't be changed)
68+
:param object train_filter: A :class:`~dataikuapi.dss.utils.DSSFilterBuilder` to build the settings of the filter of the train dataset. May be None (won't be changed)
69+
:param object test_filter: A :class:`~dataikuapi.dss.utils.DSSFilterBuilder` to build the settings of the filter of the test dataset. May be None (won't be changed)
7070
"""
7171
sp = self.mltask_settings["splitParams"]
7272
if dataset_name is None:
@@ -206,8 +206,15 @@ def get_algorithm_settings(self, algorithm_name):
206206
Gets the training settings for a particular algorithm. This returns a reference to the
207207
algorithm's settings, not a copy, so changes made to the returned object will be reflected when saving.
208208
209-
All algorithms have at least an "enabled" setting. Other settings are algorithm-dependent. You can print
210-
the returned object to learn more about the settings of each particular algorithm
209+
This method returns a dictionary of the settings for this algorithm.
210+
All algorithm dicts have at least an "enabled" key in the dictionary.
211+
The 'enabled' key indicates whether this algorithm will be trained
212+
213+
Other settings are algorithm-dependent and are the various hyperparameters of the
214+
algorithm. The precise keys for each algorithm are not all documented. You can print
215+
the returned dictionary to learn more about the settings of each particular algorithm
216+
217+
Please refer to the documentation for details on available algorithms.
211218
212219
:param str algorithm_name: Name (in capitals) of the algorithm.
213220
:return: A dict of the settings for an algorithm
@@ -220,17 +227,19 @@ def get_algorithm_settings(self, algorithm_name):
220227

221228
def set_algorithm_enabled(self, algorithm_name, enabled):
222229
"""
223-
Enables or disables an algorithm.
230+
Enables or disables an algorithm based on its name.
231+
232+
Please refer to the documentation for details on available algorithms.
224233
225234
:param str algorithm_name: Name (in capitals) of the algorithm.
226235
"""
227236
self.get_algorithm_settings(algorithm_name)["enabled"] = enabled
228237

229238
def set_metric(self, metric=None, custom_metric=None, custom_metric_greater_is_better=True, custom_metric_use_probas=False):
230239
"""
231-
Set a metric on a prediction ML task
240+
Sets the score metric to optimize for a prediction ML Task
232241
233-
:param str metric: metric to use. Leave empty for custom_metric
242+
:param str metric: metric to use. Leave empty to use a custom metric. You need to set the ``custom_metric`` value in that case
234243
:param str custom_metric: code of the custom metric
235244
:param bool custom_metric_greater_is_better: whether the custom metric is a score or a loss
236245
:param bool custom_metric_use_probas: whether to use the classes' probas or the predicted value (for classification)
@@ -778,7 +787,7 @@ def start_ensembling(self, model_ids=[], method=None):
778787
:param list model_ids: A list of model identifiers
779788
:param str method: the ensembling method (AVERAGE, PROBA_AVERAGE, MEDIAN, VOTE, LINEAR_MODEL, LOGISTIC_MODEL)
780789
781-
This returns immediately, before train is complete. To wait for train to complete, use ``wait_train_complete()``
790+
This returns immediately, before train is complete. To wait for train to complete, use :meth:`wait_train_complete`
782791
783792
:return: the model identifier of the ensemble
784793
:rtype: string
@@ -794,7 +803,7 @@ def start_ensembling(self, model_ids=[], method=None):
794803

795804
def wait_train_complete(self):
796805
"""
797-
Waits for train to be complete.
806+
Waits for train to be complete (if started with :meth:`start_train`)
798807
"""
799808
while True:
800809
status = self.get_status()
@@ -807,7 +816,7 @@ def get_trained_models_ids(self, session_id=None, algorithm=None):
807816
"""
808817
Gets the list of trained model identifiers for this ML task.
809818
810-
These identifiers can be used for ``get_trained_model_snippet`` and ``deploy_to_flow``
819+
These identifiers can be used for :meth:`get_trained_model_snippet` and :meth:`deploy_to_flow`
811820
812821
:return: A list of model identifiers
813822
:rtype: list of strings
@@ -824,7 +833,7 @@ def get_trained_models_ids(self, session_id=None, algorithm=None):
824833

825834
def get_trained_model_snippet(self, id=None, ids=None):
826835
"""
827-
Gets a quick summary of a trained model, as a dict. For complete information and a structured object, use :meth:get_trained_model_details
836+
Gets a quick summary of a trained model, as a dict. For complete information and a structured object, use :meth:`get_trained_model_detail`
828837
829838
:param str id: a model id
830839
:param list ids: a list of model ids
@@ -856,14 +865,13 @@ def get_trained_model_details(self, id):
856865
857866
:param str id: Identifier of the trained model, as returned by :meth:`get_trained_models_ids`
858867
859-
:return: A :class:`DSSTrainedModelDetails` representing the details of this trained model id
860-
:rtype: :class:`DSSTrainedModelDetails`
868+
:return: A :class:`DSSTrainedPredictionModelDetails` or :class:`DSSTrainedClusteringModelDetails` representing the details of this trained model id
869+
:rtype: :class:`DSSTrainedPredictionModelDetails` or :class:`DSSTrainedClusteringModelDetails`
861870
"""
862871
ret = self.client._perform_json(
863872
"GET", "/projects/%s/models/lab/%s/%s/models/%s/details" % (self.project_key, self.analysis_id, self.mltask_id,id))
864873
snippet = self.get_trained_model_snippet(id)
865874

866-
867875
if "facts" in ret:
868876
return DSSTrainedClusteringModelDetails(ret, snippet, mltask=self, mltask_model_id=id)
869877
else:

dataikuapi/dss/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def with_distinct(self):
5959
return self
6060

6161
def with_formula(self, expression):
62-
"""Sets the filter to deduplicate"""
62+
"""Sets the formula (DSS formula) used to filter rows"""
6363
self.filter["enabled"] = True
6464
self.filter["expression"] = expression
6565
self.filter["uiData"]["mode"] = "CUSTOM"

0 commit comments

Comments
 (0)