Skip to content

Commit 2c154fa

Browse files
committed
Add project.new_recipe() to automatically return a typed recipe creator + add prepare recipe creator
1 parent 5568bb9 commit 2c154fa

File tree

3 files changed

+87
-11
lines changed

3 files changed

+87
-11
lines changed

dataikuapi/dss/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ def new_code_recipe(self, type, code=None, recipe_name=None):
481481

482482
def new_grouping_recipe(self, first_group_by, recipe_name=None):
483483
if recipe_name is None:
484-
recipe_name = "grouping_recipe_from_%s" % (self.dataset_name)
484+
recipe_name = "group_%s" % (self.dataset_name)
485485
builder = recipe.GroupingRecipeCreator(recipe_name, self.project)
486486
builder.with_input(self.dataset_name)
487487
builder.with_group_key(first_group_by)

dataikuapi/dss/project.py

Lines changed: 65 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import time, warnings, sys, os.path as osp
22
from .dataset import DSSDataset, DSSManagedDatasetCreationHelper
33
from .recipe import DSSRecipe
4+
from . import recipe
45
from .managedfolder import DSSManagedFolder
56
from .savedmodel import DSSSavedModel
67
from .job import DSSJob, DSSJobWaiter
@@ -829,13 +830,9 @@ def list_recipes(self):
829830

830831
def get_recipe(self, recipe_name):
831832
"""
832-
Get a handle to interact with a specific recipe
833-
834-
Args:
835-
recipe_name: the name of the desired recipe
836-
837-
Returns:
838-
A :class:`dataikuapi.dss.recipe.DSSRecipe` recipe handle
833+
Gets a :class:`dataikuapi.dss.recipe.DSSRecipe` handle to interact with a recipe
834+
:param str recipe_name: The name of the recipe
835+
:rtype :class:`dataikuapi.dss.recipe.DSSRecipe`
839836
"""
840837
return DSSRecipe(self.client, self.project_key, recipe_name)
841838

@@ -862,6 +859,67 @@ def create_recipe(self, recipe_proto, creation_settings):
862859
body = definition)['name']
863860
return DSSRecipe(self.client, self.project_key, recipe_name)
864861

862+
def new_recipe(self, type, name):
863+
"""
864+
Initializes the creation of a new recipe. Returns a :class:`dataikuapi.dss.recipe.DSSRecipeCreator`
865+
or one of its subclasses to complete the creation of the recipe.
866+
867+
Usage example:
868+
869+
.. code-block:: python
870+
871+
grouping_recipe_builder = project.new_recipe("grouping")
872+
grouping_recipe_builder.with_input("dataset_to_group_on")
873+
# Create a new managed dataset for the output in the "filesystem_managed" connection
874+
grouping_recipe_builder.with_new_output("grouped_dataset", "filesystem_managed")
875+
grouping_recipe_builder.with_group_key("column")
876+
recipe = grouping_recipe_builder.build()
877+
878+
# After the recipe is created, you can edit its settings
879+
recipe_settings = recipe.get_settings()
880+
recipe_settings.set_column_aggregations("value", sum=True)
881+
recipe_settings.save()
882+
883+
# And you may need to apply new schemas to the outputs
884+
recipe.compute_schema_updates().apply()
885+
886+
:param str type: Type of the recipe
887+
:rtype: :class:`dataikuapi.dss.recipe.DSSRecipeCreator`
888+
"""
889+
890+
if type == "grouping":
891+
return recipe.GroupingRecipeCreator(name, self)
892+
elif type == "window":
893+
return recipe.WindowRecipeCreator(name, self)
894+
elif type == "sync":
895+
return recipe.SyncRecipeCreator(name, self)
896+
elif type == "sort":
897+
return recipe.SortRecipeCreator(name, self)
898+
elif type == "topn":
899+
return recipe.TopNRecipeCreator(name, self)
900+
elif type == "distinct":
901+
return recipe.DistinctRecipeCreator(name, self)
902+
elif type == "join":
903+
return recipe.JoinRecipeCreator(name, self)
904+
elif type == "vstack":
905+
return recipe.StackRecipeCreator(name, self)
906+
elif type == "sampling":
907+
return recipe.SamplingRecipeCreator(name, self)
908+
elif type == "split":
909+
return recipe.SplitRecipeCreator(name, self)
910+
elif type == "prepare" or type == "shaker":
911+
return recipe.PrepareRecipeCreator(name, self)
912+
elif type == "prediction_scoring":
913+
return recipe.PredictionScoringRecipeCreator(name, self)
914+
elif type == "clustering_scoring":
915+
return recipe.ClusteringScoringRecipeCreator(name, self)
916+
elif type == "download":
917+
return recipe.DownloadRecipeCreator(name, self)
918+
elif type == "sql_query":
919+
return recipe.SQLQueryRecipeCreator(name, self)
920+
elif type in ["python", "r", "sql_script", "pyspark", "sparkr", "spark_scala", "shell"]:
921+
return recipe.CodeRecipeCreator(name, type, self)
922+
865923
########################################################
866924
# Flow
867925
########################################################

dataikuapi/dss/recipe.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __init__(self, client, project_key, recipe_name):
1717
# Dataset deletion
1818
########################################################
1919

20-
def compute_schema_update(self):
20+
def compute_schema_updates(self):
2121
"""
2222
Computes which updates are required to the outputs of this recipe.
2323
The required updates are returned as a :class:`RequiredSchemaUpdates` object, which then
@@ -27,7 +27,7 @@ def compute_schema_update(self):
2727
2828
.. code-block:: python
2929
30-
required_updates = recipe.compute_schema_update()
30+
required_updates = recipe.compute_schema_updates()
3131
if required_updates.any_action_required():
3232
print("Some schemas will be updated")
3333
@@ -318,14 +318,25 @@ def replace_output(self, current_output_ref, new_output_ref):
318318

319319
# Old name
320320
class DSSRecipeDefinitionAndPayload(DSSRecipeSettings):
321+
"""
322+
Deprecated. Settings of a recipe. Do not create this directly, use :meth:`DSSRecipe.get_settings`
323+
"""
321324
pass
322325

323326
class GroupingRecipeSettings(DSSRecipeSettings):
327+
"""
328+
Settings of a grouping recipe. Do not create this directly, use :meth:`DSSRecipe.get_settings`
329+
"""
324330
def clear_grouping_keys(self):
331+
"""Removes all grouping keys from this grouping recipe"""
325332
self._payload_to_obj()
326333
self.obj_payload["keys"] = []
327334

328335
def add_grouping_key(self, column):
336+
"""
337+
Adds grouping on a column
338+
:param str column: Column to group on
339+
"""
329340
self._payload_to_obj()
330341
self.obj_payload["keys"].append({"column":column})
331342

@@ -584,6 +595,13 @@ class DistinctRecipeCreator(SingleOutputRecipeCreator):
584595
def __init__(self, name, project):
585596
SingleOutputRecipeCreator.__init__(self, 'distinct', name, project)
586597

598+
class PrepareRecipeCreator(SingleOutputRecipeCreator):
599+
"""
600+
Create a Prepare recipe
601+
"""
602+
def __init__(self, name, project):
603+
SingleOutputRecipeCreator.__init__(self, 'shaker', name, project)
604+
587605
class GroupingRecipeCreator(SingleOutputRecipeCreator):
588606
"""
589607
Create a Group recipe
@@ -782,7 +800,7 @@ def __init__(self, name, project):
782800
class RequiredSchemaUpdates(object):
783801
"""
784802
Representation of the updates required to the schema of the outputs of a recipe.
785-
Do not create this class directly, use :meth:`DSSRecipe.compute_schema_update`
803+
Do not create this class directly, use :meth:`DSSRecipe.compute_schema_updates`
786804
"""
787805

788806
def __init__(self, recipe, data):

0 commit comments

Comments
 (0)