Make recipe name optional and add output dataset overwriting also for visual recipes

cstenac · cstenac · commit ef6e32062ad8 · 2020-04-20T16:47:34.000+02:00
diff --git a/dataikuapi/dss/dataset.py b/dataikuapi/dss/dataset.py
@@ -132,6 +132,14 @@ def set_definition(self, definition):
                 "PUT", "/projects/%s/datasets/%s" % (self.project_key, self.dataset_name),
                 body=definition)
 
+    def exists(self):
+        """Returns whether this dataset exists"""
+        try:
+            self.get_metadata()
+            return True
+        except Exception as e:
+            return False
+
     ########################################################
     # Dataset metadata
     ########################################################
@@ -571,9 +579,6 @@ def get_as_core_dataset(self):
     def new_code_recipe(self, type, code=None, recipe_name=None):
         """Starts creation of a new code recipe taking this dataset as input"""
 
-        if recipe_name is None:
-            recipe_name = "%s_recipe_from_%s" % (type, self.dataset_name)
-
         if type == "python":
             builder = recipe.PythonRecipeCreator(recipe_name, self.project)
         else:
@@ -584,13 +589,17 @@ def new_code_recipe(self, type, code=None, recipe_name=None):
         return builder
 
     def new_grouping_recipe(self, first_group_by, recipe_name=None):
-        if recipe_name is None:
-            recipe_name = "group_%s" % (self.dataset_name)
         builder = recipe.GroupingRecipeCreator(recipe_name, self.project)
         builder.with_input(self.dataset_name)
         builder.with_group_key(first_group_by)
         return builder
 
+    def new_recipe(self, type, recipe_name=None):
+        """Starts creation of a new recipe taking this dataset as input"""
+        builder = self.project.new_recipe(type, recipe_name)
+        builder.with_input(self.dataset_name)
+        return builder
+
 class DSSDatasetSettings(object):
     def __init__(self, dataset, settings):
         self.dataset = dataset
diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py
@@ -867,7 +867,7 @@ def create_recipe(self, recipe_proto, creation_settings):
                        body = definition)['name']
         return DSSRecipe(self.client, self.project_key, recipe_name)
 
-    def new_recipe(self, type, name):
+    def new_recipe(self, type, name=None):
         """
         Initializes the creation of a new recipe. Returns a :class:`dataikuapi.dss.recipe.DSSRecipeCreator`
         or one of its subclasses to complete the creation of the recipe.
diff --git a/dataikuapi/dss/recipe.py b/dataikuapi/dss/recipe.py
@@ -448,6 +448,9 @@ def __init__(self, type, name, project):
         self.creation_settings = {
         }
 
+    def set_name(self, name):
+        self.recipe_proto["name"] = name
+
     def _build_ref(self, object_id, project_key=None):
         if project_key is not None and project_key != self.project.project_key:
             return project_key + '.' + object_id
@@ -547,10 +550,10 @@ def with_existing_output(self, dataset_name, append=False):
         self._with_output(dataset_name, append)
         return self
 
-    def with_new_output(self, name, connection_id, typeOptionId=None, format_option_id=None, override_sql_schema=None, partitioning_option_id=None, append=False, object_type='DATASET'):
+    def with_new_output(self, name, connection_id, typeOptionId=None, format_option_id=None, override_sql_schema=None, partitioning_option_id=None, append=False, object_type='DATASET', overwrite=False):
         """
         Create a new dataset as output to the recipe-to-be-created. The dataset is not created immediately,
-        but when the recipe is created (ie in the build() method)
+        but when the recipe is created (ie in the create() method)
 
         :param str name: name of the dataset or identifier of the managed folder
         :param str connection_id: name of the connection to create the dataset on
@@ -565,9 +568,15 @@ def with_new_output(self, name, connection_id, typeOptionId=None, format_option_
         :param append: whether the recipe should append or overwrite the output when running
                        (note: not available for all dataset types)
         :param str object_type: DATASET or MANAGED_FOLDER
+        :param overwrite: If the dataset being created already exists, overwrite it (and delete data)
         """
         if object_type == 'DATASET':
             assert self.create_output_dataset is None
+
+            dataset = self.project.get_dataset(name)
+            if overwrite and dataset.exists():
+                dataset.delete(drop_data=True)
+
             self.create_output_dataset = True
             self.output_dataset_settings = {'connectionId':connection_id,'typeOptionId':typeOptionId,'specificSettings':{'formatOptionId':format_option_id, 'overrideSQLSchema':override_sql_schema},'partitioningOptionId':partitioning_option_id}
             self._with_output(name, append)
@@ -952,7 +961,7 @@ def with_new_output_dataset(self, name, connection,
                     Use None for not partitioning the output, "FIRST_INPUT" to copy from the first input of the recipe,
                     "dataset:XXX" to copy from a dataset name, or "folder:XXX" to copy from a folder id
         :param append: whether the recipe should append or overwrite the output when running (note: not available for all dataset types)
-        :param overwrite: If the object being created already exists, overwrite it
+        :param overwrite: If the dataset being created already exists, overwrite it (and delete data)
         """
 
         ch = self.project.new_managed_dataset_creation_helper(name)