Skip to content

Commit 96d8672

Browse files
committed
More doc + summary -> snippet
1 parent 402e4fa commit 96d8672

File tree

2 files changed

+53
-49
lines changed

2 files changed

+53
-49
lines changed

dataikuapi/dss/ml.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,9 @@ class DSSClusteringMLTaskSettings(DSSMLTaskSettings):
207207

208208

209209
class DSSTrainedModelDetails(object):
210-
def __init__(self, details, summary, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
210+
def __init__(self, details, snippet, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
211211
self.details = details
212-
self.summary = summary
212+
self.snippet = snippet
213213
self.saved_model = saved_model
214214
self.saved_model_version = saved_model_version
215215
self.mltask = mltask
@@ -221,6 +221,13 @@ def get_raw(self):
221221
"""
222222
return self.details
223223

224+
def get_raw_snippet(self):
225+
"""
226+
Gets the raw dictionary of trained model snippet.
227+
The snippet is a lighter version than the details.
228+
"""
229+
return self.snippet
230+
224231
def get_train_info(self):
225232
"""
226233
Returns various information about the train process (size of the train set, quick description, timing information)
@@ -257,15 +264,8 @@ class DSSTrainedPredictionModelDetails(DSSTrainedModelDetails):
257264
Do not create this object directly, use :meth:`DSSMLTask.get_trained_model_details()` instead
258265
"""
259266

260-
def __init__(self, details, summary, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
261-
DSSTrainedModelDetails.__init__(self, details, summary, saved_model, saved_model_version, mltask, mltask_model_id)
262-
263-
def get_raw_snippet(self):
264-
"""
265-
Gets the raw dictionary of trained model snippet
266-
"""
267-
return self.summary
268-
267+
def __init__(self, details, snippet, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
268+
DSSTrainedModelDetails.__init__(self, details, snippet, saved_model, saved_model_version, mltask, mltask_model_id)
269269

270270
def get_roc_curve_data(self):
271271
roc = self.details.get("perf", {}).get("rocVizData",{})
@@ -299,11 +299,11 @@ def get_performance_metrics(self):
299299
:rtype: dict
300300
"""
301301
import copy
302-
clean_summary = copy.deepcopy(self.summary)
302+
clean_snippet = copy.deepcopy(self.snippet)
303303
for x in ["gridsearchData", "trainDate", "topImportance", "backendType", "userMeta", "sessionDate", "trainInfo", "fullModelId", "gridLength", "algorithm", "sessionId"]:
304-
if x in clean_summary:
305-
del clean_summary[x]
306-
return clean_summary
304+
if x in clean_snippet:
305+
del clean_snippet[x]
306+
return clean_snippet
307307

308308

309309
def get_preprocessing_settings(self):
@@ -372,8 +372,8 @@ class DSSTrainedClusteringModelDetails(DSSTrainedModelDetails):
372372
Do not create this object directly, use :meth:`DSSMLTask.get_trained_model_details()` instead
373373
"""
374374

375-
def __init__(self, details, summary, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
376-
DSSTrainedModelDetails.__init__(self, details, summary, saved_model, saved_model_version, mltask, mltask_model_id)
375+
def __init__(self, details, snippet, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
376+
DSSTrainedModelDetails.__init__(self, details, snippet, saved_model, saved_model_version, mltask, mltask_model_id)
377377

378378

379379
def get_raw(self):
@@ -405,11 +405,11 @@ def get_performance_metrics(self):
405405
:rtype: dict
406406
"""
407407
import copy
408-
clean_summary = copy.deepcopy(self.summary)
408+
clean_snippet = copy.deepcopy(self.snippet)
409409
for x in ["fullModelId", "algorithm", "trainInfo", "userMeta", "backendType", "sessionId", "sessionDate", "facts"]:
410-
if x in clean_summary:
411-
del clean_summary[x]
412-
return clean_summary
410+
if x in clean_snippet:
411+
del clean_snippet[x]
412+
return clean_snippet
413413

414414
def get_preprocessing_settings(self):
415415
"""
@@ -506,7 +506,7 @@ def get_trained_models_ids(self):
506506
"""
507507
Gets the list of trained model identifiers for this ML task.
508508
509-
These identifiers can be used for ``get_trained_model_summary`` and ``deploy_to_flow``
509+
These identifiers can be used for ``get_trained_model_snippet`` and ``deploy_to_flow``
510510
511511
:return: A list of model identifiers
512512
:rtype: list of strings
@@ -515,7 +515,7 @@ def get_trained_models_ids(self):
515515
return [x["id"] for x in status["fullModelIds"]]
516516

517517

518-
def get_trained_model_summary(self, id):
518+
def get_trained_model_snippet(self, id):
519519
"""
520520
Gets a quick summary of a trained model, as a dict. For complete information and a structured object, use :meth:get_trained_model_details
521521
@@ -525,9 +525,8 @@ def get_trained_model_summary(self, id):
525525
"modelsIds" : [id]
526526
}
527527
ret = self.client._perform_json(
528-
"POST", "/projects/%s/models/lab/%s/%s/models-summaries" % (self.project_key, self.analysis_id, self.mltask_id),
528+
"POST", "/projects/%s/models/lab/%s/%s/models-snippets" % (self.project_key, self.analysis_id, self.mltask_id),
529529
body = obj)
530-
#print ("summaries: %s" % json.dumps(ret, indent=2))
531530
return ret[id]
532531

533532
def get_trained_model_details(self, id):
@@ -541,13 +540,13 @@ def get_trained_model_details(self, id):
541540
"""
542541
ret = self.client._perform_json(
543542
"GET", "/projects/%s/models/lab/%s/%s/models/%s/details" % (self.project_key, self.analysis_id, self.mltask_id,id))
544-
summary = self.get_trained_model_summary(id)
543+
snippet = self.get_trained_model_snippet(id)
545544

546545

547546
if "facts" in ret:
548-
return DSSTrainedClusteringModelDetails(ret, summary, mltask=self, mltask_model_id=id)
547+
return DSSTrainedClusteringModelDetails(ret, snippet, mltask=self, mltask_model_id=id)
549548
else:
550-
return DSSTrainedPredictionModelDetails(ret, summary, mltask=self, mltask_model_id=id)
549+
return DSSTrainedPredictionModelDetails(ret, snippet, mltask=self, mltask_model_id=id)
551550

552551
def deploy_to_flow(self, model_id, model_name, train_dataset, test_dataset=None, redo_optimization=True):
553552
"""

dataikuapi/dss/project.py

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717

1818
class DSSProject(object):
1919
"""
20-
A handle to interact with a project on the DSS instance. Do not create this class directly,
21-
instead use ``client.api_client`` where ``client`` is a DSSClient
20+
A handle to interact with a project on the DSS instance.
21+
22+
Do not create this class directly, instead use :meth:`dataikuapi.DSSClient.get_project``
2223
"""
2324
def __init__(self, client, project_key):
2425
self.client = client
@@ -48,7 +49,10 @@ def delete(self, drop_data=False):
4849
def get_export_stream(self, options = {}):
4950
"""
5051
Return a stream of the exported project
51-
You need to close the stream after download. Failure to do so will reuse in the DSSClient becoming unusable.
52+
You need to close the stream after download. Failure to do so will result in the DSSClient becoming unusable.
53+
54+
:returns: a file-like obbject that is a stream of the export archive
55+
:rtype: file-like
5256
"""
5357
return self.client._perform_raw(
5458
"POST", "/projects/%s/export" % self.project_key, body=options).raw
@@ -57,7 +61,7 @@ def export_to_file(self, path, options={}):
5761
"""
5862
Export the project to a file
5963
60-
:param path: the path of the file in which the exported project should be saved
64+
:param str path: the path of the file in which the exported project should be saved
6165
"""
6266
with open(path, 'wb') as f:
6367
export_stream = self.client._perform_raw(
@@ -74,22 +78,20 @@ def export_to_file(self, path, options={}):
7478
def get_metadata(self):
7579
"""
7680
Get the metadata attached to this project. The metadata contains label, description
77-
checklists, tags and custom metadata of the project
81+
checklists, tags and custom metadata of the project.
82+
83+
For more information on available metadata, please see https://doc.dataiku.com/dss/api/latest
7884
79-
Returns:
80-
a dict object. For more information on available metadata, please see
81-
https://doc.dataiku.com/dss/api/latest
85+
:returns: a dict object containing the project metadata.
86+
:rtype: dict
8287
"""
83-
return self.client._perform_json(
84-
"GET", "/projects/%s/metadata" % self.project_key)
88+
return self.client._perform_json("GET", "/projects/%s/metadata" % self.project_key)
8589

8690
def set_metadata(self, metadata):
8791
"""
8892
Set the metadata on this project.
8993
90-
Args:
91-
metadata: the new state of the metadata for the project. You should only set a metadata object
92-
that has been retrieved using the get_metadata call.
94+
:param metadata dict: the new state of the metadata for the project. You should only set a metadata object that has been retrieved using the :meth:`get_metadata` call.
9395
"""
9496
return self.client._perform_empty(
9597
"PUT", "/projects/%s/metadata" % self.project_key, body = metadata)
@@ -98,19 +100,16 @@ def get_permissions(self):
98100
"""
99101
Get the permissions attached to this project
100102
101-
Returns:
102-
a JSON object, containing the owner and the permissions, as a list of pairs of group name
103-
and permission type
103+
:returns: A dict containing the owner and the permissions, as a list of pairs of group name and permission type
104104
"""
105105
return self.client._perform_json(
106106
"GET", "/projects/%s/permissions" % self.project_key)
107107

108108
def set_permissions(self, permissions):
109109
"""
110-
Set the permissions on this project
110+
Sets the permissions on this project
111111
112-
Args:
113-
permissions: a JSON object of the same structure as the one returned by get_permissions call
112+
:param permissions dict: a permissions object with the same structure as the one returned by :meth:`get_permissions` call
114113
"""
115114
return self.client._perform_empty(
116115
"PUT", "/projects/%s/permissions" % self.project_key, body = permissions)
@@ -123,8 +122,8 @@ def list_datasets(self):
123122
"""
124123
List the datasets in this project
125124
126-
Returns:
127-
the list of the datasets, each one as a JSON object
125+
:returns: The list of the datasets, each one as a dictionary. Each dataset dict contains at least a `name` field which is the name of the dataset
126+
:rtype: list of dicts
128127
"""
129128
return self.client._perform_json(
130129
"GET", "/projects/%s/datasets/" % self.project_key)
@@ -187,6 +186,9 @@ def create_prediction_ml_task(self, input_dataset, target_variable,
187186
You should wait for the guessing to be completed by calling
188187
``wait_guess_complete`` on the returned object before doing anything
189188
else (in particular calling ``train`` or ``get_settings``)
189+
190+
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
191+
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: DEFAULT, SIMPLE_FORMULA, DECISION_TREE, EXPLANATORY and PERFORMANCE
190192
"""
191193

192194
obj = {
@@ -215,6 +217,9 @@ def create_clustering_ml_task(self, input_dataset,
215217
You should wait for the guessing to be completed by calling
216218
``wait_guess_complete`` on the returned object before doing anything
217219
else (in particular calling ``train`` or ``get_settings``)
220+
221+
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
222+
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: KMEANS and ANOMALY_DETECTION
218223
"""
219224

220225
obj = {

0 commit comments

Comments
 (0)