Skip to content

Commit 5656ca1

Browse files
committed
Helpers to create a managed dataset
1 parent 4ab2bd3 commit 5656ca1

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

dataikuapi/dss/dataset.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,3 +254,51 @@ def get_object_discussions(self):
254254
:rtype: :class:`dataikuapi.discussion.DSSObjectDiscussions`
255255
"""
256256
return DSSObjectDiscussions(self.client, self.project_key, "DATASET", self.dataset_name)
257+
258+
class DSSManagedDatasetCreationHelper(object):
259+
260+
def __init__(self, project, dataset_name):
261+
self.project = project
262+
self.dataset_name = dataset_name
263+
self.creation_settings = { "specificSettings" : {} }
264+
265+
def get_creation_settings():
266+
return self.creation_settings
267+
268+
def with_store_into(self, connection, type_option_id = None, format_option_id = None):
269+
"""
270+
Sets the connection into which to store the new managed dataset
271+
:param str connection: Name of the connection to store into
272+
:param str type_option_id: If the connection accepts several types of datasets, the type
273+
:param str format_option_id: Optional identifier of a file format option
274+
:return: self
275+
"""
276+
self.creation_settings["connectionId"] = connection
277+
if type_option_id is not None:
278+
self.creation_settings["typeOptionId"] = type_option_id
279+
if format_option_id is not None:
280+
self.creation_settings["specificSettings"]["formatOptionId"] = format_option_id
281+
return self
282+
283+
def with_copy_partitioning_from(self, dataset_ref):
284+
"""
285+
Sets the new managed dataset to use the same partitioning as an existing dataset_name
286+
287+
:param str dataset_ref: Name of the dataset to copy partitioning from
288+
:return: self
289+
"""
290+
self["partitioningOptionId"] = "copy:%s" % dataset_ref
291+
return self
292+
293+
def create(self):
294+
"""
295+
Executes the creation of the managed dataset according to the selected options
296+
297+
:return: The :class:`DSSDataset` corresponding to the newly created dataset
298+
"""
299+
self.project.client._perform_json("POST", "/projects/%s/datasets/managed" % self.project.project_key,
300+
body = {
301+
"name": self.dataset_name,
302+
"creationSettings": self.creation_settings
303+
})
304+
return DSSDataset(self.project.client, self.project.project_key, self.dataset_name)

dataikuapi/dss/project.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import time
2-
from .dataset import DSSDataset
2+
from .dataset import DSSDataset, DSSManagedDatasetCreationHelper
33
from .recipe import DSSRecipe
44
from .managedfolder import DSSManagedFolder
55
from .savedmodel import DSSSavedModel
@@ -239,6 +239,15 @@ def create_dataset(self, dataset_name, type,
239239
body = obj)
240240
return DSSDataset(self.client, self.project_key, dataset_name)
241241

242+
def new_managed_dataset_creation_helper(self, dataset_name):
243+
"""
244+
Creates a helper class to create a managed dataset in the project
245+
246+
:param string dataset_name: Name of the new dataset - must be unique in the project
247+
:return: A :class:`dataikuapi.dss.dataset.DSSManagedDatasetCreationHelper` object to create the managed dataset
248+
"""
249+
return DSSManagedDatasetCreationHelper(self, dataset_name)
250+
242251
########################################################
243252
# ML
244253
########################################################

0 commit comments

Comments
 (0)