Skip to content

Commit 89b9358

Browse files
committed
api for analyses
1 parent 71a0efa commit 89b9358

File tree

2 files changed

+190
-0
lines changed

2 files changed

+190
-0
lines changed

dataikuapi/dss/analysis.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
from ..utils import DataikuException
2+
from ..utils import DataikuUTF8CSVReader
3+
from ..utils import DataikuStreamedHttpUTF8CSVReader
4+
import json
5+
import time
6+
from .metrics import ComputedMetrics
7+
from .ml import DSSMLTask
8+
9+
class DSSAnalysis(object):
10+
"""A handle to interact with a DSS visual analysis"""
11+
def __init__(self, client, project_key, analysis_id):
12+
self.client = client
13+
self.project_key = project_key
14+
self.analysis_id = analysis_id
15+
16+
########################################################
17+
# Analysis deletion
18+
########################################################
19+
20+
def delete(self, drop_data=False):
21+
"""
22+
Delete the dataset
23+
24+
:param bool drop_data: Should the data of the dataset be dropped
25+
"""
26+
return self.client._perform_empty("DELETE", "/projects/%s/lab/%s/" % (self.project_key, self.analysis_id))
27+
28+
29+
########################################################
30+
# Analysis definition
31+
########################################################
32+
33+
def get_definition(self):
34+
"""
35+
Get the definition of the analysis
36+
37+
Returns:
38+
the definition, as a JSON object
39+
"""
40+
return self.client._perform_json("GET", "/projects/%s/lab/%s/" % (self.project_key, self.analysis_id))
41+
42+
def set_definition(self, definition):
43+
"""
44+
Set the definition of the analysis
45+
46+
Args:
47+
definition: the definition, as a JSON object. You should only set a definition object
48+
that has been retrieved using the get_definition call.
49+
"""
50+
return self.client._perform_json("PUT", "/projects/%s/lab/%s/" % (self.project_key, self.analysis_id), body=definition)
51+
52+
53+
########################################################
54+
# ML
55+
########################################################
56+
57+
def create_prediction_ml_task(self, target_variable,
58+
ml_backend_type = "PY_MEMORY",
59+
guess_policy = "DEFAULT"):
60+
61+
62+
"""Creates a new prediction task in this visual analysis lab
63+
for a dataset.
64+
65+
66+
The returned ML task will be in 'guessing' state, i.e. analyzing
67+
the input dataset to determine feature handling and algorithms.
68+
69+
You should wait for the guessing to be completed by calling
70+
``wait_guess_complete`` on the returned object before doing anything
71+
else (in particular calling ``train`` or ``get_settings``)
72+
73+
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
74+
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: DEFAULT, SIMPLE_FORMULA, DECISION_TREE, EXPLANATORY and PERFORMANCE
75+
"""
76+
77+
obj = {
78+
"taskType" : "PREDICTION",
79+
"targetVariable" : target_variable,
80+
"backendType": ml_backend_type,
81+
"guessPolicy": guess_policy
82+
}
83+
84+
ref = self.client._perform_json("POST", "/projects/%s/lab/%s/models/" % (self.project_key, self.analysis_id), body=obj)
85+
return DSSMLTask(self.client, self.project_key, self.analysis_id, ref["mlTaskId"])
86+
87+
def create_clustering_ml_task(self,
88+
ml_backend_type = "PY_MEMORY",
89+
guess_policy = "KMEANS"):
90+
91+
92+
"""Creates a new clustering task in a new visual analysis lab
93+
for a dataset.
94+
95+
96+
The returned ML task will be in 'guessing' state, i.e. analyzing
97+
the input dataset to determine feature handling and algorithms.
98+
99+
You should wait for the guessing to be completed by calling
100+
``wait_guess_complete`` on the returned object before doing anything
101+
else (in particular calling ``train`` or ``get_settings``)
102+
103+
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
104+
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: KMEANS and ANOMALY_DETECTION
105+
"""
106+
107+
obj = {
108+
"taskType" : "CLUSTERING",
109+
"backendType": ml_backend_type,
110+
"guessPolicy": guess_policy
111+
}
112+
113+
ref = self.client._perform_json("POST", "/projects/%s/lab/%s/models/" % (self.project_key, self.analysis_id), body=obj)
114+
return DSSMLTask(self.client, self.project_key, self.analysis_id, ref["mlTaskId"])
115+
116+
def list_ml_tasks(self):
117+
"""
118+
List the ML tasks in this visual analysis
119+
120+
Returns:
121+
the list of the ML tasks summaries, each one as a JSON object
122+
"""
123+
return self.client._perform_json("GET", "/projects/%s/lab/%s/models/" % (self.project_key, self.analysis_id))
124+
125+
def get_ml_task(self, mltask_id):
126+
"""
127+
Get a handle to interact with a specific ML task
128+
129+
Args:
130+
mltask_id: the identifier of the desired ML task
131+
132+
Returns:
133+
A :class:`dataikuapi.dss.ml.DSSMLTask` ML task handle
134+
"""
135+
return DSSMLTask(self.client, self.project_key, self.analysis_id, mltask_id)
136+

dataikuapi/dss/project.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .notebook import DSSNotebook
1313
from .macro import DSSMacro
1414
from .ml import DSSMLTask
15+
from .analysis import DSSAnalysis
1516
from dataikuapi.utils import DataikuException
1617

1718

@@ -232,10 +233,63 @@ def create_clustering_ml_task(self, input_dataset,
232233
ref = self.client._perform_json("POST", "/projects/%s/models/lab/" % self.project_key, body=obj)
233234
return DSSMLTask(self.client, self.project_key, ref["analysisId"], ref["mlTaskId"])
234235

236+
def list_ml_tasks(self):
237+
"""
238+
List the ML tasks in this project
239+
240+
Returns:
241+
the list of the ML tasks summaries, each one as a JSON object
242+
"""
243+
return self.client._perform_json("GET", "/projects/%s/models/lab/" % self.project_key)
244+
235245
def get_ml_task(self, analysis_id, mltask_id):
246+
"""
247+
Get a handle to interact with a specific ML task
248+
249+
Args:
250+
analysis_id: the identifier of the visual analysis containing the desired ML task
251+
mltask_id: the identifier of the desired ML task
252+
253+
Returns:
254+
A :class:`dataikuapi.dss.ml.DSSMLTask` ML task handle
255+
"""
236256
return DSSMLTask(self.client, self.project_key, analysis_id, mltask_id)
237257

238258

259+
def create_analysis(self, input_dataset):
260+
"""
261+
Creates a new visual analysis lab for a dataset.
262+
263+
"""
264+
265+
obj = {
266+
"inputDataset" : input_dataset
267+
}
268+
269+
ref = self.client._perform_json("POST", "/projects/%s/lab/" % self.project_key, body=obj)
270+
return DSSAnalysis(self.client, self.project_key, ref["analysisId"])
271+
272+
def list_analyses(self):
273+
"""
274+
List the visual analyses in this project
275+
276+
Returns:
277+
the list of the visual analyses summaries, each one as a JSON object
278+
"""
279+
return self.client._perform_json("GET", "/projects/%s/lab/" % self.project_key)
280+
281+
def get_analysis(self, analysis_id):
282+
"""
283+
Get a handle to interact with a specific visual analysis
284+
285+
Args:
286+
analysis_id: the identifier of the desired visual analysis
287+
288+
Returns:
289+
A :class:`dataikuapi.dss.analysis.DSSAnalysis` visual analysis handle
290+
"""
291+
return DSSAnalysis(self.client, self.project_key, analysis_id)
292+
239293
########################################################
240294
# Saved models
241295
########################################################

0 commit comments

Comments
 (0)