@@ -688,16 +688,18 @@ def __init__(self, name, project):
688688
689689 def with_group_key (self , group_key ):
690690 """
691- Set a column as grouping key
691+ Set a column as the first grouping key. Only a single grouping key may be set
692+ at recipe creation time. For additional groupings, get the recipe settings
692693
693- :param str group_key: name of a column in the input
694+ :param str group_key: name of a column in the input dataset
694695 """
695696 self .group_key = group_key
696697 return self
697698
698699 def _finish_creation_settings (self ):
699700 super (GroupingRecipeCreator , self )._finish_creation_settings ()
700- self .creation_settings ['groupKey' ] = self .group_key
701+ if self .group_key is not None :
702+ self .creation_settings ['groupKey' ] = self .group_key
701703
702704
703705class WindowRecipeSettings (DSSRecipeSettings ):
@@ -798,9 +800,85 @@ def __init__(self, name, project):
798800class JoinRecipeSettings (DSSRecipeSettings ):
799801 """
800802 Settings of a join recipe. Do not create this directly, use :meth:`DSSRecipe.get_settings`
803+
804+ In order to enable self-joins, join recipes are based on a concept of "virtual inputs".
805+ Every join, computed pre-join column, pre-join filter, ... is based on one virtual input, and
806+ each virtual input references an input of the recipe, by index
807+
808+ For example, if a recipe has inputs A and B and declares two joins:
809+ - A->B
810+ - A->A(based on a computed column)
811+
812+ There are 3 virtual inputs:
813+ * 0: points to recipe input 0 (i.e. dataset A)
814+ * 1: points to recipe input 1 (i.e. dataset B)
815+ * 2: points to recipe input 0 (i.e. dataset A) and includes the computed column
816+
817+ * The first join is between virtual inputs 0 and 1
818+ * The second join is between virtual inputs 0 and 2
801819 """
802820 pass # TODO: Write helpers for join
803821
822+ @property
823+ def raw_virtual_inputs (self ):
824+ """
825+ Returns the raw list of virtual inputs
826+ :rtype list of dict
827+ """
828+ return self .get_json_payload ()["virtualInputs" ]
829+
830+ @property
831+ def raw_joins (self ):
832+ """
833+ Returns the raw list of joins
834+ :rtype list of dict
835+ """
836+ return self .get_json_payload ()["joins" ]
837+
838+ def add_virtual_input (self , input_dataset_index ):
839+ """
840+ Adds a virtual input pointing to the specified input dataset of the recipe
841+ (referenced by index in the inputs list)
842+ """
843+ self .raw_virtual_inputs .append ({"index" : input_dataset_index })
844+
845+ def add_join (self , join_type = "LEFT" , input1 = 0 , input2 = 1 ):
846+ """
847+ Adds a join between two virtual inputs. The join is initialized with no condition.
848+
849+ Use :meth:`add_condition_to_join` on the return value to add a join condition (for example column equality)
850+ to the join
851+
852+ :returns the newly added join as a dict
853+ :rtype dict
854+ """
855+ jp = self .get_json_payload ()
856+ if not "joins" in jp :
857+ jp ["joins" ] = []
858+ join = {
859+ "conditionsMode" : "AND" ,
860+ "on" : [],
861+ "table1" : input1 ,
862+ "table2" : input2 ,
863+ "type" : join_type
864+ }
865+ jp ["joins" ].append (join )
866+ return join
867+
868+ def add_condition_to_join (self , join , type = "EQ" , column1 = None , column2 = None ):
869+ """
870+ Adds a condition to a join
871+ :param str column1: Name of "left" column
872+ :param str column2: Name of "right" column
873+ """
874+ cond = {
875+ "type" : type ,
876+ "column1" : {"name" : column1 , "table" : join ["table1" ]},
877+ "column2" : {"name" : column2 , "table" : join ["table2" ]},
878+ }
879+ join ["on" ].append (cond )
880+ return cond
881+
804882class JoinRecipeCreator (VirtualInputsSingleOutputRecipeCreator ):
805883 """
806884 Create a Join recipe
0 commit comments