Skip to content

Commit 90869f4

Browse files
committed
doc recipe creation
1 parent fdb0c15 commit 90869f4

File tree

1 file changed

+136
-3
lines changed

1 file changed

+136
-3
lines changed

dataikuapi/dss/recipe.py

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,32 +85,64 @@ def __init__(self, data):
8585
self.data = data
8686

8787
def get_recipe_raw_definition(self):
88+
"""
89+
Get the recipe definition as a raw JSON object
90+
"""
8891
return self.data.get('recipe', None)
8992

9093
def get_recipe_inputs(self):
94+
"""
95+
Get the list of inputs of this recipe
96+
"""
9197
return self.data.get('recipe').get('inputs')
9298

9399
def get_recipe_outputs(self):
100+
"""
101+
Get the list of outputs of this recipe
102+
"""
94103
return self.data.get('recipe').get('outputs')
95104

96105
def get_recipe_params(self):
106+
"""
107+
Get the parameters of this recipe, as a raw JSON object
108+
"""
97109
return self.data.get('recipe').get('params')
98110

99111
def get_payload(self):
112+
"""
113+
Get the payload or script of this recipe, as a raw string
114+
"""
100115
return self.data.get('payload', None)
101116

102117
def get_json_payload(self):
118+
"""
119+
Get the payload or script of this recipe, as a JSON object
120+
"""
103121
return json.loads(self.data.get('payload', None))
104122

105123
def set_payload(self, payload):
124+
"""
125+
Set the raw payload of this recipe
126+
127+
:param str payload: the payload, as a string
128+
"""
106129
self.data['payload'] = payload
107130

108131
def set_json_payload(self, payload):
132+
"""
133+
Set the raw payload of this recipe
134+
135+
:param dict payload: the payload, as a dict. The payload will be converted to a JSON string internally
136+
"""
109137
self.data['payload'] = json.dumps(payload)
110138

111139
class DSSRecipeCreator(object):
112140
"""
113141
Helper to create new recipes
142+
143+
:param str type: type of the recipe
144+
:param str name: name for the recipe
145+
:param :class:`dataikuapi.dss.project.DSSProject` project: project in which the recipe will be created
114146
"""
115147
def __init__(self, type, name, project):
116148
self.project = project
@@ -146,11 +178,27 @@ def _with_output(self, dataset_name, append=False, role="main"):
146178
return self
147179

148180
def with_input(self, dataset_name, project_key=None, role="main"):
181+
"""
182+
Add an existing object as input to the recipe-to-be-created
183+
184+
:param dataset_name: name of the dataset, or identifier of the managed folder
185+
or identifier of the saved model
186+
:param project_key: project containing the object, if different from the one where the recipe is created
187+
:param str role: the role of the recipe in which the input should be added
188+
"""
149189
return self._with_input(dataset_name, project_key, role)
150190

151191
def with_output(self, dataset_name, append=False, role="main"):
152-
"""The output dataset must already exist. If you are creating a visual recipe with a single
153-
output, use with_existing_output"""
192+
"""
193+
The output dataset must already exist. If you are creating a visual recipe with a single
194+
output, use with_existing_output
195+
196+
:param dataset_name: name of the dataset, or identifier of the managed folder
197+
or identifier of the saved model
198+
:param append: whether the recipe should append or overwrite the output when running
199+
(note: not available for all dataset types)
200+
:param str role: the role of the recipe in which the input should be added
201+
"""
154202
return self._with_output(dataset_name, append, role)
155203

156204
def build(self):
@@ -167,6 +215,10 @@ def _finish_creation_settings(self):
167215
pass
168216

169217
class SingleOutputRecipeCreator(DSSRecipeCreator):
218+
"""
219+
Create a recipe that has a single output
220+
"""
221+
170222
def __init__(self, type, name, project):
171223
DSSRecipeCreator.__init__(self, type, name, project)
172224
self.create_output_dataset = None
@@ -175,12 +227,38 @@ def __init__(self, type, name, project):
175227
self.output_folder_settings = None
176228

177229
def with_existing_output(self, dataset_name, append=False):
230+
"""
231+
Add an existing object as output to the recipe-to-be-created
232+
233+
:param dataset_name: name of the dataset, or identifier of the managed folder
234+
or identifier of the saved model
235+
:param append: whether the recipe should append or overwrite the output when running
236+
(note: not available for all dataset types)
237+
"""
178238
assert self.create_output_dataset is None
179239
self.create_output_dataset = False
180240
self._with_output(dataset_name, append)
181241
return self
182242

183243
def with_new_output(self, name, connection_id, typeOptionId=None, format_option_id=None, override_sql_schema=None, partitioning_option_id=None, append=False, object_type='DATASET'):
244+
"""
245+
Create a new dataset as output to the recipe-to-be-created. The dataset is not created immediately,
246+
but when the recipe is created (ie in the build() method)
247+
248+
:param str name: name of the dataset or identifier of the managed folder
249+
:param str connection_id: name of the connection to create the dataset on
250+
:param str typeOptionId: sub-type of dataset, for connection where the type could be ambiguous. Typically,
251+
this is SCP or SFTP, for SSH connection
252+
:param str format_option_id: name of a format preset relevant for the dataset type. Possible values are: CSV_ESCAPING_NOGZIP_FORHIVE,
253+
CSV_UNIX_GZIP, CSV_EXCEL_GZIP, CSV_EXCEL_GZIP_BIGQUERY, CSV_NOQUOTING_NOGZIP_FORPIG, PARQUET_HIVE,
254+
AVRO, ORC
255+
:param override_sql_schema: schema to force dataset, for SQL dataset. If left empty, will be autodetected
256+
:param str partitioning_option_id: to copy the partitioning schema of an existing dataset 'foo', pass a
257+
value of 'copy:foo'
258+
:param append: whether the recipe should append or overwrite the output when running
259+
(note: not available for all dataset types)
260+
:param str object_type: DATASET or MANAGED_FOLDER
261+
"""
184262
if object_type == 'DATASET':
185263
assert self.create_output_dataset is None
186264
self.create_output_dataset = True
@@ -194,6 +272,7 @@ def with_new_output(self, name, connection_id, typeOptionId=None, format_option_
194272
return self
195273

196274
def with_output(self, dataset_name, append=False):
275+
"""Alias of with_existing_output"""
197276
return self.with_existing_output(dataset_name, append)
198277

199278
def _finish_creation_settings(self):
@@ -203,6 +282,10 @@ def _finish_creation_settings(self):
203282
self.creation_settings['outputFolderSettings'] = self.output_folder_settings
204283

205284
class VirtualInputsSingleOutputRecipeCreator(SingleOutputRecipeCreator):
285+
"""
286+
Create a recipe that has a single output and several inputs
287+
"""
288+
206289
def __init__(self, type, name, project):
207290
SingleOutputRecipeCreator.__init__(self, type, name, project)
208291
self.virtual_inputs = []
@@ -221,31 +304,54 @@ def _finish_creation_settings(self):
221304
#
222305
########################
223306
class WindowRecipeCreator(SingleOutputRecipeCreator):
307+
"""
308+
Create a Window recipe
309+
"""
224310
def __init__(self, name, project):
225311
SingleOutputRecipeCreator.__init__(self, 'window', name, project)
226312

227313
class SyncRecipeCreator(SingleOutputRecipeCreator):
314+
"""
315+
Create a Sync recipe
316+
"""
228317
def __init__(self, name, project):
229318
SingleOutputRecipeCreator.__init__(self, 'sync', name, project)
230319

231320
class SortRecipeCreator(SingleOutputRecipeCreator):
321+
"""
322+
Create a Sort recipe
323+
"""
232324
def __init__(self, name, project):
233325
SingleOutputRecipeCreator.__init__(self, 'sort', name, project)
234326

235327
class TopNRecipeCreator(DSSRecipeCreator):
328+
"""
329+
Create a TopN recipe
330+
"""
236331
def __init__(self, name, project):
237332
DSSRecipeCreator.__init__(self, 'topn', name, project)
238333

239334
class DistinctRecipeCreator(SingleOutputRecipeCreator):
335+
"""
336+
Create a Distinct recipe
337+
"""
240338
def __init__(self, name, project):
241339
SingleOutputRecipeCreator.__init__(self, 'distinct', name, project)
242340

243341
class GroupingRecipeCreator(SingleOutputRecipeCreator):
342+
"""
343+
Create a Group recipe
344+
"""
244345
def __init__(self, name, project):
245346
SingleOutputRecipeCreator.__init__(self, 'grouping', name, project)
246347
self.group_key = None
247348

248349
def with_group_key(self, group_key):
350+
"""
351+
Set a column as grouping key
352+
353+
:param str group_key: name of a column in the input
354+
"""
249355
self.group_key = group_key
250356
return self
251357

@@ -254,37 +360,61 @@ def _finish_creation_settings(self):
254360
self.creation_settings['groupKey'] = self.group_key
255361

256362
class JoinRecipeCreator(VirtualInputsSingleOutputRecipeCreator):
363+
"""
364+
Create a Join recipe
365+
"""
257366
def __init__(self, name, project):
258367
VirtualInputsSingleOutputRecipeCreator.__init__(self, 'join', name, project)
259368

260369
class StackRecipeCreator(VirtualInputsSingleOutputRecipeCreator):
370+
"""
371+
Create a Stack recipe
372+
"""
261373
def __init__(self, name, project):
262374
VirtualInputsSingleOutputRecipeCreator.__init__(self, 'vstack', name, project)
263375

264376
class SamplingRecipeCreator(SingleOutputRecipeCreator):
377+
"""
378+
Create a Sample/Filter recipe
379+
"""
265380
def __init__(self, name, project):
266381
SingleOutputRecipeCreator.__init__(self, 'sampling', name, project)
267382

268383
class CodeRecipeCreator(DSSRecipeCreator):
269384
def __init__(self, name, type, project):
385+
"""
386+
Create a recipe running a script
387+
388+
:param str type: the type of the recipe (possible values : python, r, hive, impala, spark_scala, pyspark, sparkr)
389+
"""
270390
DSSRecipeCreator.__init__(self, type, name, project)
271391
self.script = None
272392

273393
def with_script(self, script):
394+
"""
395+
Set the code of the recipe
396+
397+
:param str script: the script of the recipe
398+
"""
274399
self.script = script
275400
return self
276401

277402
def _finish_creation_settings(self):
278403
super(CodeRecipeCreator, self)._finish_creation_settings()
279-
# DSSRecipeCreator._finish_creation_settings(self)
280404
self.creation_settings['script'] = self.script
281405

282406

283407
class SQLQueryRecipeCreator(SingleOutputRecipeCreator):
408+
"""
409+
Create a SQL query recipe
410+
"""
284411
def __init__(self, name, project):
285412
SingleOutputRecipeCreator.__init__(self, 'sql_query', name, project)
286413

287414
class SplitRecipeCreator(DSSRecipeCreator):
415+
"""
416+
Create a Split recipe
417+
"""
288418
def __init__(self, name, project):
289419
DSSRecipeCreator.__init__(self, "split", name, project)
290420

@@ -356,5 +486,8 @@ def with_input_model(self, model_id):
356486

357487

358488
class DownloadRecipeCreator(SingleOutputRecipeCreator):
489+
"""
490+
Create a Download recipe
491+
"""
359492
def __init__(self, name, project):
360493
SingleOutputRecipeCreator.__init__(self, 'download', name, project)

0 commit comments

Comments
 (0)