Skip to content

Commit bc97339

Browse files
authored
Merge pull request #48 from dataiku/fterrazzoni/eda-public-api-client
Public API for manipulating statistics worksheets
2 parents 280c13e + 014f6d5 commit bc97339

File tree

3 files changed

+314
-3
lines changed

3 files changed

+314
-3
lines changed

dataikuapi/dss/dataset.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
from .metrics import ComputedMetrics
66
from .discussion import DSSObjectDiscussions
7+
from .statistics import DSSStatisticsWorksheet
78

89
class DSSDataset(object):
910
"""
@@ -204,6 +205,62 @@ def run_checks(self, partition='', checks=None):
204205
"POST" , "/projects/%s/datasets/%s/actions/runChecks" %(self.project_key, self.dataset_name),
205206
params={'partition':partition}, body=checks)
206207

208+
########################################################
209+
# Statistics worksheets
210+
########################################################
211+
212+
def list_statistics_worksheets(self, as_objects=True):
213+
"""
214+
List the statistics worksheets associated to this dataset.
215+
216+
:rtype: list of :class:`dataikuapi.dss.statistics.DSSStatisticsWorksheet`
217+
"""
218+
worksheets = self.client._perform_json(
219+
"GET", "/projects/%s/datasets/%s/statistics/worksheets/" % (self.project_key, self.dataset_name))
220+
if as_objects:
221+
return [self.get_statistics_worksheet(worksheet['id']) for worksheet in worksheets]
222+
else:
223+
return worksheets
224+
225+
def create_statistics_worksheet(self, name="My worksheet"):
226+
"""
227+
Create a new worksheet in the dataset, and return a handle to interact with it.
228+
229+
:param string input_dataset: input dataset of the worksheet
230+
:param string worksheet_name: name of the worksheet
231+
232+
Returns:
233+
A :class:`dataikuapi.dss.statistics.DSSStatisticsWorksheet` dataset handle
234+
"""
235+
236+
worksheet_definition = {
237+
"projectKey": self.project_key,
238+
"name": name,
239+
"dataSpec": {
240+
"inputDatasetSmartName": self.dataset_name,
241+
"datasetSelection": {
242+
"partitionSelectionMethod": "ALL",
243+
"maxRecords": 30000,
244+
"samplingMethod": "FULL"
245+
}
246+
}
247+
}
248+
created_worksheet = self.client._perform_json(
249+
"POST", "/projects/%s/datasets/%s/statistics/worksheets/" % (self.project_key, self.dataset_name),
250+
body=worksheet_definition
251+
)
252+
return self.get_statistics_worksheet(created_worksheet['id'])
253+
254+
def get_statistics_worksheet(self, worksheet_id):
255+
"""
256+
Get a handle to interact with a statistics worksheet
257+
258+
:param string worksheet_id: the ID of the desired worksheet
259+
260+
:returns: A :class:`dataikuapi.dss.statistics.DSSStatisticsWorksheet` worksheet handle
261+
"""
262+
return DSSStatisticsWorksheet(self.client, self.project_key, self.dataset_name, worksheet_id)
263+
207264
########################################################
208265
# Metrics
209266
########################################################

dataikuapi/dss/future.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ class DSSFuture(object):
44
"""
55
A future on the DSS instance
66
"""
7-
def __init__(self, client, job_id, state=None):
7+
def __init__(self, client, job_id, state=None, result_wrapper=lambda result: result):
88
self.client = client
99
self.job_id = job_id
1010
self.state = state
1111
self.state_is_peek = True
12+
self.result_wrapper = result_wrapper
1213

1314
@classmethod
1415
def get_result_wait_if_needed(cls, client, ret):
@@ -50,7 +51,7 @@ def get_result(self):
5051
if self.state is None or not self.state.get('hasResult', False) or self.state_is_peek:
5152
self.get_state()
5253
if self.state.get('hasResult', False):
53-
return self.state.get('result', None)
54+
return self.result_wrapper(self.state.get('result', None))
5455
else:
5556
raise Exception("Result not ready")
5657

@@ -72,7 +73,7 @@ def wait_for_result(self):
7273
time.sleep(5)
7374
self.get_state()
7475
if self.state.get('hasResult', False):
75-
return self.state.get('result', None)
76+
return self.result_wrapper(self.state.get('result', None))
7677
else:
7778
raise Exception("No result")
7879

dataikuapi/dss/statistics.py

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
from ..utils import DataikuException
2+
from .utils import DSSDatasetSelectionBuilder
3+
from .future import DSSFuture
4+
import json
5+
from .metrics import ComputedMetrics
6+
from .discussion import DSSObjectDiscussions
7+
8+
9+
class DSSStatisticsWorksheet(object):
10+
"""
11+
A handle to interact with a worksheet.
12+
"""
13+
14+
def __init__(self, client, project_key, dataset_name, worksheet_id):
15+
self.client = client
16+
self.project_key = project_key
17+
self.dataset_name = dataset_name
18+
self.worksheet_id = worksheet_id
19+
20+
def delete(self):
21+
"""
22+
Deletes the worksheet
23+
"""
24+
return self.client._perform_empty(
25+
"DELETE", "/projects/%s/datasets/%s/statistics/worksheets/%s" % (self.project_key, self.dataset_name, self.worksheet_id))
26+
27+
def get_settings(self):
28+
"""
29+
Fetches the settings of this worksheet.
30+
31+
:return: an object to interact with the settings
32+
:rtype: :class:`DSSStatisticsWorksheetSettings`
33+
"""
34+
worksheet_json = self.client._perform_json(
35+
"GET", "/projects/%s/datasets/%s/statistics/worksheets/%s" % (
36+
self.project_key, self.dataset_name, self.worksheet_id)
37+
)
38+
return DSSStatisticsWorksheetSettings(self.client, self.project_key,
39+
self.dataset_name, self.worksheet_id, worksheet_json)
40+
41+
def run_worksheet(self, wait=True):
42+
"""
43+
Computes the results of the whole worksheet.
44+
45+
:returns: a :class:`DSSStatisticsCardResult` if `wait` is `True`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
46+
"""
47+
48+
root_card = self.get_settings().get_raw()['rootCard']
49+
return self.run_card(root_card, wait=wait)
50+
51+
def run_card(self, card, wait=True):
52+
"""
53+
Runs a card in the context of the worksheet.
54+
55+
Note: the card does not need to belong to the worksheet.
56+
57+
:param card: a card to compute
58+
:type card: :class:`DSSStatisticsCardSettings` or dict (obtained from ``DSSStatisticsCardSettings.get_raw()``)
59+
:returns: a :class:`DSSStatisticsCardResult` if `wait` is `True`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
60+
"""
61+
62+
card = DSSStatisticsCardSettings._from_card_or_dict(self.client, card)
63+
future_response = self.client._perform_json(
64+
"POST",
65+
"/projects/%s/datasets/%s/statistics/worksheets/%s/actions/run-card" % (
66+
self.project_key, self.dataset_name, self.worksheet_id),
67+
body=card.get_raw()
68+
)
69+
70+
future = DSSFuture(self.client, future_response.get("jobId", None), future_response,
71+
result_wrapper=lambda raw_result: DSSStatisticsCardResult(raw_result))
72+
73+
return future.wait_for_result() if wait else future
74+
75+
def run_computation(self, computation, wait=True):
76+
"""
77+
Runs a computation in the context of the worksheet.
78+
79+
:param computation: a card to compute
80+
:type computation: :class:`DSSStatisticsComputationSettings` or dict (obtained from ``DSSStatisticsComputationSettings.get_raw()``)
81+
:returns: a :class:`DSSStatisticsComputationResult`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
82+
"""
83+
84+
computation = DSSStatisticsComputationSettings._from_computation_or_dict(
85+
computation)
86+
future_response = self.client._perform_json(
87+
"POST",
88+
"/projects/%s/datasets/%s/statistics/worksheets/%s/actions/run-computation" % (
89+
self.project_key, self.dataset_name, self.worksheet_id),
90+
body=computation.get_raw()
91+
)
92+
93+
future = DSSFuture(self.client, future_response.get("jobId", None), future_response,
94+
result_wrapper=lambda raw_result: DSSStatisticsComputationResult(raw_result))
95+
96+
return future.wait_for_result() if wait else future
97+
98+
99+
class DSSStatisticsWorksheetSettings(object):
100+
def __init__(self, client, project_key, dataset_name, worksheet_id, worksheet_definition):
101+
self._worksheet_definition = worksheet_definition
102+
self.client = client
103+
self.project_key = project_key
104+
self.dataset_name = dataset_name
105+
self.worksheet_id = worksheet_id
106+
107+
def add_card(self, card):
108+
"""
109+
Adds a new card to the worksheet.
110+
111+
:param card: card to be added
112+
:type card: :class:`DSSStatisticsCardSettings` or dict (obtained from ``DSSStatisticsCardSettings.get_raw()``)
113+
"""
114+
card = DSSStatisticsCardSettings._from_card_or_dict(self.client, card)
115+
self._worksheet_definition['rootCard']['cards'].append(card.get_raw())
116+
117+
def list_cards(self):
118+
"""
119+
Lists the cards of this worksheet.
120+
121+
:rtype: list of :class:`DSSStatisticsCardSettings`
122+
"""
123+
return [DSSStatisticsCardSettings(self.client, card_definition)
124+
for card_definition in self._worksheet_definition['rootCard']['cards']]
125+
126+
def get_raw(self):
127+
"""
128+
Gets a reference to the raw settings of the worksheet.
129+
130+
:rtype: dict
131+
"""
132+
return self._worksheet_definition
133+
134+
def set_sampling_settings(self, selection):
135+
"""
136+
Sets the sampling settings of the worksheet
137+
138+
:type card: :class:`DSSDatasetSelectionBuilder` or dict (obtained from ``get_raw_sampling_selection()``)
139+
"""
140+
raw_selection = selection.build() if isinstance(
141+
selection, DSSDatasetSelectionBuilder) else selection
142+
self._worksheet_definition['dataSpec']['datasetSelection'] = raw_selection
143+
144+
def get_raw_sampling_settings(self):
145+
"""
146+
Gets a reference to the raw sampling settings of the worksheet.
147+
148+
:rtype: dict
149+
"""
150+
return self._worksheet_definition['dataSpec']['datasetSelection']
151+
152+
def save(self):
153+
"""
154+
Saves the settings to DSS
155+
"""
156+
self._worksheet_definition = self.client._perform_json(
157+
"PUT",
158+
"/projects/%s/datasets/%s/statistics/worksheets/%s" % (
159+
self.project_key, self.dataset_name, self.worksheet_id),
160+
body=self._worksheet_definition
161+
)
162+
163+
164+
class DSSStatisticsCardSettings(object):
165+
"""
166+
Object to manipulate the settings of a card
167+
"""
168+
169+
def __init__(self, client, card_definition):
170+
self.client = client
171+
self._card_definition = card_definition
172+
173+
def get_raw(self):
174+
"""
175+
Gets a reference to the raw settings of the card.
176+
177+
:rtype: dict
178+
"""
179+
return self._card_definition
180+
181+
def compile(self):
182+
"""
183+
Gets the underlying computation used to compute the card results.
184+
185+
:rtype: DSSStatisticsComputationSettings
186+
"""
187+
computation_json = self.client._perform_json(
188+
"POST", "/statistics/cards/compile", body=self._card_definition
189+
)
190+
return DSSStatisticsComputationSettings(computation_json)
191+
192+
@staticmethod
193+
def _from_card_or_dict(client, card_or_dict):
194+
if isinstance(card_or_dict, DSSStatisticsCardSettings):
195+
card_or_dict = card_or_dict.get_raw()
196+
return DSSStatisticsCardSettings(client, card_or_dict)
197+
198+
199+
class DSSStatisticsCardResult(object):
200+
"""
201+
Object storing the results of a :class:`DSSStatisticsCardSettings`
202+
"""
203+
204+
def __init__(self, card_result):
205+
self._card_result = card_result
206+
207+
def get_raw(self):
208+
"""
209+
Gets a reference to the raw results of the card
210+
211+
:rtype: dict
212+
"""
213+
return self._card_result
214+
215+
216+
class DSSStatisticsComputationSettings(object):
217+
"""
218+
Object to manipulate the settings of a computation
219+
"""
220+
221+
def __init__(self, computation_definition):
222+
self._computation_definition = computation_definition
223+
224+
def get_raw(self):
225+
"""
226+
Gets the raw settings of the computation.
227+
228+
:rtype: dict
229+
"""
230+
return self._computation_definition
231+
232+
@staticmethod
233+
def _from_computation_or_dict(computation_or_dict):
234+
if isinstance(computation_or_dict, DSSStatisticsComputationSettings):
235+
computation_or_dict = computation_or_dict.get_raw()
236+
return DSSStatisticsComputationSettings(computation_or_dict)
237+
238+
239+
class DSSStatisticsComputationResult(object):
240+
"""
241+
Object storing the results of a :class:`DSSStatisticsComputationSettings`
242+
"""
243+
244+
def __init__(self, computation_result):
245+
self._computation_result = computation_result
246+
247+
def get_raw(self):
248+
"""
249+
Gets a reference to the raw results of the computation
250+
251+
:rtype: dict
252+
"""
253+
return self._computation_result

0 commit comments

Comments
 (0)