More doc + summary -> snippet

cstenac · cstenac · commit 96d86725398c · 2018-02-08T18:11:50.000+01:00
diff --git a/dataikuapi/dss/ml.py b/dataikuapi/dss/ml.py
@@ -207,9 +207,9 @@ class DSSClusteringMLTaskSettings(DSSMLTaskSettings):
 
 
 class DSSTrainedModelDetails(object):
-    def __init__(self, details, summary, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
+    def __init__(self, details, snippet, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
         self.details = details
-        self.summary = summary
+        self.snippet = snippet
         self.saved_model = saved_model
         self.saved_model_version = saved_model_version
         self.mltask = mltask
@@ -221,6 +221,13 @@ def get_raw(self):
         """
         return self.details
 
+    def get_raw_snippet(self):
+        """
+        Gets the raw dictionary of trained model snippet. 
+        The snippet is a lighter version than the details.
+        """
+        return self.snippet
+
     def get_train_info(self):
         """
         Returns various information about the train process (size of the train set, quick description, timing information)
@@ -257,15 +264,8 @@ class DSSTrainedPredictionModelDetails(DSSTrainedModelDetails):
     Do not create this object directly, use :meth:`DSSMLTask.get_trained_model_details()` instead
     """
 
-    def __init__(self, details, summary, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
-        DSSTrainedModelDetails.__init__(self, details, summary, saved_model, saved_model_version, mltask, mltask_model_id)
-
-    def get_raw_snippet(self):
-        """
-        Gets the raw dictionary of trained model snippet
-        """
-        return self.summary
-
+    def __init__(self, details, snippet, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
+        DSSTrainedModelDetails.__init__(self, details, snippet, saved_model, saved_model_version, mltask, mltask_model_id)
 
     def get_roc_curve_data(self):
         roc = self.details.get("perf", {}).get("rocVizData",{})
@@ -299,11 +299,11 @@ def get_performance_metrics(self):
         :rtype: dict
         """
         import copy
-        clean_summary = copy.deepcopy(self.summary)
+        clean_snippet = copy.deepcopy(self.snippet)
         for x in ["gridsearchData", "trainDate", "topImportance", "backendType", "userMeta", "sessionDate", "trainInfo", "fullModelId", "gridLength", "algorithm", "sessionId"]:
-            if x in clean_summary:
-                del clean_summary[x]
-        return clean_summary
+            if x in clean_snippet:
+                del clean_snippet[x]
+        return clean_snippet
 
 
     def get_preprocessing_settings(self):
@@ -372,8 +372,8 @@ class DSSTrainedClusteringModelDetails(DSSTrainedModelDetails):
     Do not create this object directly, use :meth:`DSSMLTask.get_trained_model_details()` instead
     """
 
-    def __init__(self, details, summary, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
-        DSSTrainedModelDetails.__init__(self, details, summary, saved_model, saved_model_version, mltask, mltask_model_id)
+    def __init__(self, details, snippet, saved_model=None, saved_model_version=None, mltask=None, mltask_model_id=None):
+        DSSTrainedModelDetails.__init__(self, details, snippet, saved_model, saved_model_version, mltask, mltask_model_id)
 
 
     def get_raw(self):
@@ -405,11 +405,11 @@ def get_performance_metrics(self):
         :rtype: dict
         """
         import copy
-        clean_summary = copy.deepcopy(self.summary)
+        clean_snippet = copy.deepcopy(self.snippet)
         for x in ["fullModelId", "algorithm", "trainInfo", "userMeta", "backendType", "sessionId", "sessionDate", "facts"]:
-            if x in clean_summary:
-                del clean_summary[x]
-        return clean_summary
+            if x in clean_snippet:
+                del clean_snippet[x]
+        return clean_snippet
 
     def get_preprocessing_settings(self):
         """
@@ -506,7 +506,7 @@ def get_trained_models_ids(self):
         """
         Gets the list of trained model identifiers for this ML task.
 
-        These identifiers can be used for ``get_trained_model_summary`` and ``deploy_to_flow``
+        These identifiers can be used for ``get_trained_model_snippet`` and ``deploy_to_flow``
 
         :return: A list of model identifiers
         :rtype: list of strings
@@ -515,7 +515,7 @@ def get_trained_models_ids(self):
         return [x["id"] for x in status["fullModelIds"]]
 
 
-    def get_trained_model_summary(self, id):
+    def get_trained_model_snippet(self, id):
         """
         Gets a quick summary of a trained model, as a dict. For complete information and a structured object, use :meth:get_trained_model_details
 
@@ -525,9 +525,8 @@ def get_trained_model_summary(self, id):
             "modelsIds" : [id]
         }
         ret = self.client._perform_json(
-            "POST", "/projects/%s/models/lab/%s/%s/models-summaries" % (self.project_key, self.analysis_id, self.mltask_id),
+            "POST", "/projects/%s/models/lab/%s/%s/models-snippets" % (self.project_key, self.analysis_id, self.mltask_id),
             body = obj)
-        #print ("summaries: %s" % json.dumps(ret, indent=2))
         return ret[id]
 
     def get_trained_model_details(self, id):
@@ -541,13 +540,13 @@ def get_trained_model_details(self, id):
         """
         ret = self.client._perform_json(
             "GET", "/projects/%s/models/lab/%s/%s/models/%s/details" % (self.project_key, self.analysis_id, self.mltask_id,id))
-        summary = self.get_trained_model_summary(id)
+        snippet = self.get_trained_model_snippet(id)
 
 
         if "facts" in ret:
-            return DSSTrainedClusteringModelDetails(ret, summary, mltask=self, mltask_model_id=id)
+            return DSSTrainedClusteringModelDetails(ret, snippet, mltask=self, mltask_model_id=id)
         else:
-            return DSSTrainedPredictionModelDetails(ret, summary, mltask=self, mltask_model_id=id)
+            return DSSTrainedPredictionModelDetails(ret, snippet, mltask=self, mltask_model_id=id)
 
     def deploy_to_flow(self, model_id, model_name, train_dataset, test_dataset=None, redo_optimization=True):
         """
diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py
@@ -17,8 +17,9 @@
 
 class DSSProject(object):
     """
-    A handle to interact with a project on the DSS instance. Do not create this class directly,
-    instead use ``client.api_client`` where ``client`` is a DSSClient
+    A handle to interact with a project on the DSS instance.
+
+    Do not create this class directly, instead use :meth:`dataikuapi.DSSClient.get_project``
     """
     def __init__(self, client, project_key):
        self.client = client
@@ -48,7 +49,10 @@ def delete(self, drop_data=False):
     def get_export_stream(self, options = {}):
         """
         Return a stream of the exported project
-        You need to close the stream after download. Failure to do so will reuse in the DSSClient becoming unusable.
+        You need to close the stream after download. Failure to do so will result in the DSSClient becoming unusable.
+
+        :returns: a file-like obbject that is a stream of the export archive
+        :rtype: file-like
         """
         return self.client._perform_raw(
             "POST", "/projects/%s/export" % self.project_key, body=options).raw
@@ -57,7 +61,7 @@ def export_to_file(self, path, options={}):
         """
         Export the project to a file
         
-        :param path: the path of the file in which the exported project should be saved
+        :param str path: the path of the file in which the exported project should be saved
         """
         with open(path, 'wb') as f:
             export_stream = self.client._perform_raw(
@@ -74,22 +78,20 @@ def export_to_file(self, path, options={}):
     def get_metadata(self):
         """
         Get the metadata attached to this project. The metadata contains label, description
-        checklists, tags and custom metadata of the project
+        checklists, tags and custom metadata of the project.
+
+        For more information on available metadata, please see https://doc.dataiku.com/dss/api/latest
         
-        Returns:
-            a dict object. For more information on available metadata, please see
-            https://doc.dataiku.com/dss/api/latest
+        :returns: a dict object containing the project metadata.
+        :rtype: dict
         """
-        return self.client._perform_json(
-            "GET", "/projects/%s/metadata" % self.project_key)
+        return self.client._perform_json("GET", "/projects/%s/metadata" % self.project_key)
 
     def set_metadata(self, metadata):
         """
         Set the metadata on this project.
         
-        Args:
-            metadata: the new state of the metadata for the project. You should only set a metadata object 
-            that has been retrieved using the get_metadata call.
+        :param metadata dict: the new state of the metadata for the project. You should only set a metadata object that has been retrieved using the :meth:`get_metadata` call.
         """
         return self.client._perform_empty(
             "PUT", "/projects/%s/metadata" % self.project_key, body = metadata)
@@ -98,19 +100,16 @@ def get_permissions(self):
        """
        Get the permissions attached to this project
 
-        Returns:
-            a JSON object, containing the owner and the permissions, as a list of pairs of group name
-            and permission type
+        :returns: A dict containing the owner and the permissions, as a list of pairs of group name and permission type
        """
        return self.client._perform_json(
           "GET", "/projects/%s/permissions" % self.project_key)
 
     def set_permissions(self, permissions):
         """
-        Set the permissions on this project
+        Sets the permissions on this project
         
-        Args:
-            permissions: a JSON object of the same structure as the one returned by get_permissions call
+        :param permissions dict: a permissions object with the same structure as the one returned by :meth:`get_permissions` call
         """
         return self.client._perform_empty(
             "PUT", "/projects/%s/permissions" % self.project_key, body = permissions)
@@ -123,8 +122,8 @@ def list_datasets(self):
         """
         List the datasets in this project
         
-        Returns:
-            the list of the datasets, each one as a JSON object
+        :returns: The list of the datasets, each one as a dictionary. Each dataset dict contains at least a `name` field which is the name of the dataset
+        :rtype: list of dicts
         """
         return self.client._perform_json(
             "GET", "/projects/%s/datasets/" % self.project_key)
@@ -187,6 +186,9 @@ def create_prediction_ml_task(self, input_dataset, target_variable,
         You should wait for the guessing to be completed by calling
         ``wait_guess_complete`` on the returned object before doing anything
         else (in particular calling ``train`` or ``get_settings``)
+
+        :param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
+        :param string guess_policy: Policy to use for setting the default parameters.  Valid values are: DEFAULT, SIMPLE_FORMULA, DECISION_TREE, EXPLANATORY and PERFORMANCE
         """
 
         obj = {
@@ -215,6 +217,9 @@ def create_clustering_ml_task(self, input_dataset,
         You should wait for the guessing to be completed by calling
         ``wait_guess_complete`` on the returned object before doing anything
         else (in particular calling ``train`` or ``get_settings``)
+
+        :param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
+        :param string guess_policy: Policy to use for setting the default parameters.  Valid values are: KMEANS and ANOMALY_DETECTION
         """
 
         obj = {