Skip to content

Commit 8463096

Browse files
committed
Start adding helpers to mnaage join recipes
1 parent 5763687 commit 8463096

File tree

3 files changed

+94
-12
lines changed

3 files changed

+94
-12
lines changed

dataikuapi/dss/dataset.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,11 @@ def get_as_core_dataset(self):
577577
########################################################
578578

579579
def new_code_recipe(self, type, code=None, recipe_name=None):
580-
"""Starts creation of a new code recipe taking this dataset as input"""
580+
"""
581+
Starts creation of a new code recipe taking this dataset as input
582+
:param str type: Type of the recipe ('python', 'r', 'pyspark', 'sparkr', 'sql', 'sparksql', 'hive', ...)
583+
:param str code: The code of the recipe
584+
"""
581585

582586
if type == "python":
583587
builder = recipe.PythonRecipeCreator(recipe_name, self.project)
@@ -588,15 +592,14 @@ def new_code_recipe(self, type, code=None, recipe_name=None):
588592
builder.with_script(code)
589593
return builder
590594

591-
def new_grouping_recipe(self, first_group_by, recipe_name=None):
592-
builder = recipe.GroupingRecipeCreator(recipe_name, self.project)
593-
builder.with_input(self.dataset_name)
594-
builder.with_group_key(first_group_by)
595-
return builder
596-
597595
def new_recipe(self, type, recipe_name=None):
598-
"""Starts creation of a new recipe taking this dataset as input"""
599-
builder = self.project.new_recipe(type, recipe_name)
596+
"""
597+
Starts creation of a new recipe taking this dataset as input.
598+
For more details, please see :meth:`dataikuapi.dss.project.DSSProject.new_recipe`
599+
600+
:param str type: Type of the recipe
601+
"""
602+
builder = self.project.new_recipe(type=type, name=recipe_name)
600603
builder.with_input(self.dataset_name)
601604
return builder
602605

dataikuapi/dss/project.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,7 @@ def new_recipe(self, type, name=None):
892892
recipe.compute_schema_updates().apply()
893893
894894
:param str type: Type of the recipe
895+
:param str name: Optional, base name for the new recipe.
895896
:rtype: :class:`dataikuapi.dss.recipe.DSSRecipeCreator`
896897
"""
897898

dataikuapi/dss/recipe.py

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -688,16 +688,18 @@ def __init__(self, name, project):
688688

689689
def with_group_key(self, group_key):
690690
"""
691-
Set a column as grouping key
691+
Set a column as the first grouping key. Only a single grouping key may be set
692+
at recipe creation time. For additional groupings, get the recipe settings
692693
693-
:param str group_key: name of a column in the input
694+
:param str group_key: name of a column in the input dataset
694695
"""
695696
self.group_key = group_key
696697
return self
697698

698699
def _finish_creation_settings(self):
699700
super(GroupingRecipeCreator, self)._finish_creation_settings()
700-
self.creation_settings['groupKey'] = self.group_key
701+
if self.group_key is not None:
702+
self.creation_settings['groupKey'] = self.group_key
701703

702704

703705
class WindowRecipeSettings(DSSRecipeSettings):
@@ -798,9 +800,85 @@ def __init__(self, name, project):
798800
class JoinRecipeSettings(DSSRecipeSettings):
799801
"""
800802
Settings of a join recipe. Do not create this directly, use :meth:`DSSRecipe.get_settings`
803+
804+
In order to enable self-joins, join recipes are based on a concept of "virtual inputs".
805+
Every join, computed pre-join column, pre-join filter, ... is based on one virtual input, and
806+
each virtual input references an input of the recipe, by index
807+
808+
For example, if a recipe has inputs A and B and declares two joins:
809+
- A->B
810+
- A->A(based on a computed column)
811+
812+
There are 3 virtual inputs:
813+
* 0: points to recipe input 0 (i.e. dataset A)
814+
* 1: points to recipe input 1 (i.e. dataset B)
815+
* 2: points to recipe input 0 (i.e. dataset A) and includes the computed column
816+
817+
* The first join is between virtual inputs 0 and 1
818+
* The second join is between virtual inputs 0 and 2
801819
"""
802820
pass # TODO: Write helpers for join
803821

822+
@property
823+
def raw_virtual_inputs(self):
824+
"""
825+
Returns the raw list of virtual inputs
826+
:rtype list of dict
827+
"""
828+
return self.get_json_payload()["virtualInputs"]
829+
830+
@property
831+
def raw_joins(self):
832+
"""
833+
Returns the raw list of joins
834+
:rtype list of dict
835+
"""
836+
return self.get_json_payload()["joins"]
837+
838+
def add_virtual_input(self, input_dataset_index):
839+
"""
840+
Adds a virtual input pointing to the specified input dataset of the recipe
841+
(referenced by index in the inputs list)
842+
"""
843+
self.raw_virtual_inputs.append({"index": input_dataset_index})
844+
845+
def add_join(self, join_type="LEFT", input1=0, input2=1):
846+
"""
847+
Adds a join between two virtual inputs. The join is initialized with no condition.
848+
849+
Use :meth:`add_condition_to_join` on the return value to add a join condition (for example column equality)
850+
to the join
851+
852+
:returns the newly added join as a dict
853+
:rtype dict
854+
"""
855+
jp = self.get_json_payload()
856+
if not "joins" in jp:
857+
jp["joins"] = []
858+
join = {
859+
"conditionsMode": "AND",
860+
"on": [],
861+
"table1": input1,
862+
"table2": input2,
863+
"type": join_type
864+
}
865+
jp["joins"].append(join)
866+
return join
867+
868+
def add_condition_to_join(self, join, type="EQ", column1=None, column2=None):
869+
"""
870+
Adds a condition to a join
871+
:param str column1: Name of "left" column
872+
:param str column2: Name of "right" column
873+
"""
874+
cond = {
875+
"type" : type,
876+
"column1": {"name": column1, "table": join["table1"]},
877+
"column2": {"name": column2, "table": join["table2"]},
878+
}
879+
join["on"].append(cond)
880+
return cond
881+
804882
class JoinRecipeCreator(VirtualInputsSingleOutputRecipeCreator):
805883
"""
806884
Create a Join recipe

0 commit comments

Comments
 (0)