Port from kiel: "Overwrite" flag when creating managed dataset

cstenac · cstenac · commit b2fada34008c · 2020-04-20T15:06:50.000+02:00
diff --git a/dataikuapi/dss/dataset.py b/dataikuapi/dss/dataset.py
@@ -705,15 +705,27 @@ def with_copy_partitioning_from(self, dataset_ref, object_type='DATASET'):
         self.creation_settings["partitioningOptionId"] = "copy:%s:%s" % (code, dataset_ref)
         return self
 
-    def create(self):
+    def create(self, overwrite=False):
         """
         Executes the creation of the managed dataset according to the selected options
-
+        :param overwrite: If the dataset being created already exists, delete it first (removing data)
         :return: The :class:`DSSDataset` corresponding to the newly created dataset
         """
+        if overwrite and self.already_exists():
+            self.project.get_dataset(self.dataset_name).delete(drop_data = True)
+
         self.project.client._perform_json("POST", "/projects/%s/datasets/managed" % self.project.project_key,
             body = {
                 "name": self.dataset_name,
                 "creationSettings":  self.creation_settings
         })
-        return DSSDataset(self.project.client, self.project.project_key, self.dataset_name)
+        return DSSDataset(self.project.client, self.project.project_key, self.dataset_name)
+
+    def already_exists(self):
+        """Returns whether this managed dataset already exists"""
+        dataset = self.project.get_dataset(self.dataset_name)
+        try:
+            dataset.get_metadata()
+            return True
+        except Exception as e:
+            return False
diff --git a/dataikuapi/dss/recipe.py b/dataikuapi/dss/recipe.py
@@ -937,7 +937,7 @@ def with_script(self, script):
     def with_new_output_dataset(self, name, connection,
                                 type=None, format=None,
                                 copy_partitioning_from="FIRST_INPUT",
-                                append=False):
+                                append=False, overwrite=False):
         """
         Create a new managed dataset as output to the recipe-to-be-created. The dataset is created immediately
 
@@ -952,6 +952,7 @@ def with_new_output_dataset(self, name, connection,
                     Use None for not partitioning the output, "FIRST_INPUT" to copy from the first input of the recipe,
                     "dataset:XXX" to copy from a dataset name, or "folder:XXX" to copy from a folder id
         :param append: whether the recipe should append or overwrite the output when running (note: not available for all dataset types)
+        :param overwrite: If the object being created already exists, overwrite it
         """
 
         ch = self.project.new_managed_dataset_creation_helper(name)
@@ -967,7 +968,7 @@ def with_new_output_dataset(self, name, connection,
         elif copy_partitioning_from is not None:
             self.creation_settings["partitioningOptionId"] = "copy:%s" % copy_partitioning_from
 
-        ch.create()
+        ch.create(overwrite=overwrite)
 
         self.with_output(name, append=append)
         return self