Skip to content

Commit b2fada3

Browse files
committed
Port from kiel: "Overwrite" flag when creating managed dataset
1 parent e950718 commit b2fada3

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

dataikuapi/dss/dataset.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -705,15 +705,27 @@ def with_copy_partitioning_from(self, dataset_ref, object_type='DATASET'):
705705
self.creation_settings["partitioningOptionId"] = "copy:%s:%s" % (code, dataset_ref)
706706
return self
707707

708-
def create(self):
708+
def create(self, overwrite=False):
709709
"""
710710
Executes the creation of the managed dataset according to the selected options
711-
711+
:param overwrite: If the dataset being created already exists, delete it first (removing data)
712712
:return: The :class:`DSSDataset` corresponding to the newly created dataset
713713
"""
714+
if overwrite and self.already_exists():
715+
self.project.get_dataset(self.dataset_name).delete(drop_data = True)
716+
714717
self.project.client._perform_json("POST", "/projects/%s/datasets/managed" % self.project.project_key,
715718
body = {
716719
"name": self.dataset_name,
717720
"creationSettings": self.creation_settings
718721
})
719-
return DSSDataset(self.project.client, self.project.project_key, self.dataset_name)
722+
return DSSDataset(self.project.client, self.project.project_key, self.dataset_name)
723+
724+
def already_exists(self):
725+
"""Returns whether this managed dataset already exists"""
726+
dataset = self.project.get_dataset(self.dataset_name)
727+
try:
728+
dataset.get_metadata()
729+
return True
730+
except Exception as e:
731+
return False

dataikuapi/dss/recipe.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -937,7 +937,7 @@ def with_script(self, script):
937937
def with_new_output_dataset(self, name, connection,
938938
type=None, format=None,
939939
copy_partitioning_from="FIRST_INPUT",
940-
append=False):
940+
append=False, overwrite=False):
941941
"""
942942
Create a new managed dataset as output to the recipe-to-be-created. The dataset is created immediately
943943
@@ -952,6 +952,7 @@ def with_new_output_dataset(self, name, connection,
952952
Use None for not partitioning the output, "FIRST_INPUT" to copy from the first input of the recipe,
953953
"dataset:XXX" to copy from a dataset name, or "folder:XXX" to copy from a folder id
954954
:param append: whether the recipe should append or overwrite the output when running (note: not available for all dataset types)
955+
:param overwrite: If the object being created already exists, overwrite it
955956
"""
956957

957958
ch = self.project.new_managed_dataset_creation_helper(name)
@@ -967,7 +968,7 @@ def with_new_output_dataset(self, name, connection,
967968
elif copy_partitioning_from is not None:
968969
self.creation_settings["partitioningOptionId"] = "copy:%s" % copy_partitioning_from
969970

970-
ch.create()
971+
ch.create(overwrite=overwrite)
971972

972973
self.with_output(name, append=append)
973974
return self

0 commit comments

Comments
 (0)