Skip to content

Commit 93299ff

Browse files
authored
Refactoring the MES public API (#165)
* Refactoring the MES public API. Removing file manipulation functions. Replacing create_model_evaluation by build. Removing DSSModelEvaluationSettings. Adding DSSModelEvaluationFullInfo, which provide accessors to the main components of a ME (metrics, evaluated model info, etc.) * Taking in account review remarks. * Removing as_type parameter, always returning list of instances of class. * Adding creation date to DSSModelEvaluation (makes sort quicker and easier in tests). * Adding a `get_sample_df` method to the public API, to retrieve the evaluation sample as a panda DF, without giving open access to any file of the evaluation. * Adding get_raw method to ComputedMetrics, for consistency sake. * Putting back required import. * Handling empty store case. * Remove creation date from DSSModelEvaluation
1 parent 662d64e commit 93299ff

File tree

3 files changed

+117
-106
lines changed

3 files changed

+117
-106
lines changed

dataikuapi/dss/metrics.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ class ComputedMetrics(object):
44
def __init__(self, raw):
55
self.raw = raw
66

7+
def get_raw(self):
8+
return self.raw
9+
710
def get_metric_by_id(self, id):
811
all_ids = []
912
for metric in self.raw["metrics"]:

dataikuapi/dss/modelevaluationstore.py

Lines changed: 111 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
import json
2+
from io import BytesIO
3+
4+
import pandas as pd
25

36
from dataikuapi.dss.metrics import ComputedMetrics
47
from .discussion import DSSObjectDiscussions
@@ -10,6 +13,7 @@
1013
except NameError:
1114
basestring = str
1215

16+
1317
class DSSModelEvaluationStore(object):
1418
"""
1519
A handle to interact with a model evaluation store on the DSS instance.
@@ -21,7 +25,7 @@ def __init__(self, client, project_key, mes_id):
2125
self.project = client.get_project(project_key)
2226
self.project_key = project_key
2327
self.mes_id = mes_id
24-
28+
2529
@property
2630
def id(self):
2731
return self.mes_id
@@ -36,7 +40,7 @@ def get_settings(self):
3640
"GET", "/projects/%s/modelevaluationstores/%s" % (self.project_key, self.mes_id))
3741
return DSSModelEvaluationStoreSettings(self, data)
3842

39-
43+
4044
########################################################
4145
# Misc
4246
########################################################
@@ -110,32 +114,45 @@ def delete(self):
110114

111115

112116
########################################################
113-
# Runs
117+
# Model evaluations
114118
########################################################
115119

116-
def list_model_evaluations(self, as_type=None):
120+
def list_model_evaluations(self):
117121
"""
118-
List the model evaluations in this model evaluation store.
122+
List the model evaluations in this model evaluation store. The list is sorted
123+
by ME creation date.
119124
120125
:returns: The list of the model evaluations
121-
:rtype: list
126+
:rtype: list of :class:`dataikuapi.dss.modelevaluationstore.DSSModelEvaluation`
122127
"""
123128
items = self.client._perform_json("GET", "/projects/%s/modelevaluationstores/%s/runs/" % (self.project_key, self.mes_id))
124-
if as_type in ["objects", "object"]:
125-
return [DSSModelEvaluation(self, item["ref"]["runId"]) for item in items]
126-
else:
127-
return items
129+
return [DSSModelEvaluation(self, item["ref"]["runId"]) for item in items]
128130

129131
def get_model_evaluation(self, run_id):
130132
"""
131-
Get a handle to interact with a specific model evaluations
133+
Get a handle to interact with a specific model evaluation
132134
133135
:param string run_id: the id of the desired model evaluation
134136
135137
:returns: A :class:`dataikuapi.dss.modelevaluationstore.DSSModelEvaluation` model evaluation handle
136138
"""
137139
return DSSModelEvaluation(self, run_id)
138140

141+
def get_latest_model_evaluation(self):
142+
"""
143+
Get a handle to interact with the latest model evaluation computed
144+
145+
146+
:returns: A :class:`dataikuapi.dss.modelevaluationstore.DSSModelEvaluation` model evaluation handle
147+
if the store is not empty, else None
148+
"""
149+
150+
latest_run_id = self.client._perform_text(
151+
"GET", "/projects/%s/modelevaluationstores/%s/latestRunId" % (self.project_key, self.mes_id))
152+
if not latest_run_id:
153+
return None
154+
return DSSModelEvaluation(self, latest_run_id)
155+
139156
def delete_model_evaluations(self, evaluations):
140157
"""
141158
Remove model evaluations from this store
@@ -148,30 +165,31 @@ def delete_model_evaluations(self, evaluations):
148165
obj.append(evaluation['run_id'])
149166
else:
150167
obj.append(evaluation)
151-
self.model_evaluation_store.client._perform_json(
168+
self.client._perform_json(
152169
"DELETE", "/projects/%s/modelevaluationstores/%s/runs/" % (self.project_key, self.mes_id, self.run_id), body=obj)
153170

154-
155-
def create_model_evaluation(self, labels=None, prediction_type=None, model_type=None, model_params=None, data_type=None, data_params=None, metric_params=None, active_classifier_threshold=None):
171+
def build(self, job_type="NON_RECURSIVE_FORCED_BUILD", wait=True, no_fail=False):
156172
"""
157-
Create a new model evaluation in the model evaluation store, and return a handle to interact with it.
158-
159-
:returns: A :class:`dataikuapi.dss.modelevaluationstore.DSSModelEvaluation` model evaluation handle
173+
Starts a new job to build this Model Evaluation Store and wait for it to complete.
174+
Raises if the job failed.
175+
176+
.. code-block:: python
177+
178+
job = mes.build()
179+
print("Job %s done" % job.id)
180+
181+
:param job_type: The job type. One of RECURSIVE_BUILD, NON_RECURSIVE_FORCED_BUILD or RECURSIVE_FORCED_BUILD
182+
:param wait: wait for the build to finish before returning
183+
:param no_fail: if True, does not raise if the job failed. Valid only when wait is True
184+
:return: the :class:`dataikuapi.dss.job.DSSJob` job handle corresponding to the built job
185+
:rtype: :class:`dataikuapi.dss.job.DSSJob`
160186
"""
161-
obj = {
162-
"labels": labels,
163-
"modelType": model_type,
164-
"modelParams": model_params if model_params is not None else {},
165-
"dataType": data_type,
166-
"dataParams": data_params if data_params is not None else {},
167-
"predictionType": prediction_type,
168-
"activeClassifierThreshold": active_classifier_threshold,
169-
"metricParams": metric_params if metric_params is not None else {}
170-
}
171-
res = self.client._perform_json("POST", "/projects/%s/modelevaluationstores/%s/runs/" % (self.project_key, self.mes_id),
172-
body = obj)
173-
run_id = res['id']
174-
return DSSModelEvaluation(self, run_id)
187+
jd = self.project.new_job(job_type)
188+
jd.with_output(self.mes_id, object_type="MODEL_EVALUATION_STORE")
189+
if wait:
190+
return jd.start_and_wait(no_fail)
191+
else:
192+
return jd.start(allowFail=not no_fail)
175193

176194

177195
########################################################
@@ -188,7 +206,6 @@ def get_last_metric_values(self):
188206
return ComputedMetrics(self.client._perform_json(
189207
"GET", "/projects/%s/modelevaluationstores/%s/metrics/last" % (self.project_key, self.mes_id)))
190208

191-
192209
def get_metric_history(self, metric):
193210
"""
194211
Get the history of the values of the metric on this model evaluation store
@@ -220,7 +237,6 @@ def compute_metrics(self, metric_ids=None, probes=None):
220237
"POST" , "%s/computeMetrics" % url)
221238

222239

223-
224240
class DSSModelEvaluationStoreSettings:
225241
"""
226242
A handle on the settings of a model evaluation store
@@ -240,6 +256,7 @@ def save(self):
240256
"PUT", "/projects/%s/modelevaluationstores/%s" % (self.model_evaluation_store.project_key, self.model_evaluation_store.mes_id),
241257
body=self.settings)
242258

259+
243260
class DSSModelEvaluation:
244261
"""
245262
A handle on a model evaluation
@@ -259,16 +276,9 @@ def get_full_info(self):
259276
"""
260277
Retrieve the model evaluation with its performance data
261278
"""
262-
return self.client._perform_json(
263-
"GET", "/projects/%s/modelevaluationstores/%s/runs/%s" % (self.project_key, self.mes_id, self.run_id))
264-
265-
def get_settings(self):
266-
"""
267-
Set the definition of this model evaluation
268-
"""
269279
data = self.client._perform_json(
270-
"GET", "/projects/%s/modelevaluationstores/%s/runs/%s/settings" % (self.project_key, self.mes_id, self.run_id))
271-
return DSSModelEvaluationSettings(self, data)
280+
"GET", "/projects/%s/modelevaluationstores/%s/runs/%s" % (self.project_key, self.mes_id, self.run_id))
281+
return DSSModelEvaluationFullInfo(self, data)
272282

273283
def delete(self):
274284
"""
@@ -278,80 +288,81 @@ def delete(self):
278288
self.client._perform_json(
279289
"DELETE", "/projects/%s/modelevaluationstores/%s/runs/" % (self.project_key, self.mes_id), body=obj)
280290

281-
########################################################
282-
# Model evaluation contents
283-
########################################################
284-
285-
def list_contents(self):
291+
def get_metrics(self):
286292
"""
287-
Get the list of files in the model evaluation
288-
289-
Returns:
290-
the list of files, as a JSON object
293+
Get the metrics for this model evaluation. Metrics must be understood here as Metrics in DSS Metrics & Checks
294+
295+
:return: the metrics, as a JSON object
291296
"""
292297
return self.client._perform_json(
293-
"GET", "/projects/%s/modelevaluationstores/%s/runs/%s/contents" % (self.project_key, self.mes_id, self.run_id))
298+
"GET", "/projects/%s/modelevaluationstores/%s/runs/%s/metrics" % (self.project_key, self.mes_id, self.run_id))
294299

295-
def get_file(self, path):
300+
def get_sample_df(self):
296301
"""
297-
Get a file from the model evaluation
298-
299-
Returns:
300-
the file's content, as a stream
301-
"""
302-
return self.client._perform_raw(
303-
"GET", "/projects/%s/modelevaluationstores/%s/runs/%s/contents/%s" % (self.project_key, self.mes_id, self.run_id, utils.quote(path)))
302+
Get the sample of the evaluation dataset on which the evaluation was performed
304303
305-
def delete_file(self, path):
306-
"""
307-
Delete a file from the model evaluation
304+
:return:
305+
the sample content, as a :class:`pandas.DataFrame`
308306
"""
309-
return self.client._perform_empty(
310-
"DELETE", "/projects/%s/modelevaluationstores/%s/runs/%s/contents/%s" % (self.project_key, self.mes_id, self.run_id, utils.quote(path)))
307+
buf = BytesIO()
308+
with self.client._perform_raw(
309+
"GET",
310+
"/projects/%s/modelevaluationstores/%s/runs/%s/sample" % (self.project_key, self.mes_id, self.run_id)
311+
).raw as f:
312+
buf.write(f.read())
313+
schema_txt = self.client._perform_raw(
314+
"GET",
315+
"/projects/%s/modelevaluationstores/%s/runs/%s/schema" % (self.project_key, self.mes_id, self.run_id)
316+
).text
317+
schema = json.loads(schema_txt)
318+
return pd.read_csv(BytesIO(buf.getvalue()), compression='gzip', sep='\t', header=None, names=[c["name"] for c in schema["columns"]])
311319

312-
def put_file(self, path, f):
313-
"""
314-
Upload the file to the model evaluation
315-
316-
Args:
317-
f: the file contents, as a stream
318-
path: the path of the file
319-
"""
320-
return self.client._perform_json_upload(
321-
"POST", "/projects/%s/modelevaluationstores/%s/runs/%s/contents/%s" % (self.project_key, self.mes_id, self.run_id, utils.quote(path)),
322-
"", f)
320+
321+
class DSSModelEvaluationFullInfo:
322+
"""
323+
A handle on the full information on a model evaluation.
324+
325+
Includes information such as the full id of the evaluated model, the evaluation params,
326+
the performance and drift metrics, if any, etc.
327+
328+
Do not create this class directly, instead use :meth:`dataikuapi.dss.DSSModelEvaluation.get_full_info`
329+
"""
330+
def __init__(self, model_evaluation, full_info):
331+
self.model_evaluation = model_evaluation
332+
self.full_info = full_info
333+
334+
def get_raw(self):
335+
return self.full_info
323336

324337
def get_metrics(self):
325338
"""
326-
Get the metrics for this model evaluation
339+
Get the metrics evaluated, if any.
327340
328-
:return: the metrics, as a JSON object
341+
:return: a dict containing the performance and data drift metric, if any
329342
"""
330-
return self.client._perform_json(
331-
"GET", "/projects/%s/modelevaluationstores/%s/runs/%s/metrics" % (self.project_key, self.mes_id, self.run_id))
332-
333-
class DSSModelEvaluationSettings:
334-
"""
335-
A handle on the settings of a model evaluation
343+
return self.full_info["metrics"]
336344

337-
Do not create this class directly, instead use :meth:`dataikuapi.dss.DSSModelEvaluation.get_settings`
338-
"""
345+
def get_labels(self):
346+
"""
347+
Get the labels of the Model Evaluation
339348
340-
def __init__(self, model_evaluation, settings):
341-
self.model_evaluation = model_evaluation
342-
self.settings = settings
343-
# unpack some fields
344-
self.client = model_evaluation.client
345-
self.run_id = model_evaluation.run_id
346-
self.project_key = model_evaluation.project_key
347-
self.mes_id = model_evaluation.mes_id
349+
:return: a dict containing the labels
350+
"""
351+
return self.full_info["evaluation"]["labels"]
348352

349-
def get_raw(self):
350-
return self.settings
353+
def get_evaluation_parameters(self):
354+
"""
355+
Get info on the evaluation parameters, most noticeably the evaluation metric (evaluationMetric field
356+
of the returned dict)
351357
352-
def save(self):
353-
return self.client._perform_json(
354-
"PUT", "/projects/%s/modelevaluationstores/%s/runs/%s/settings" % (self.project_key, self.mes_id, self.run_id),
355-
body=self.settings)
358+
:return: a dict
359+
"""
360+
return self.full_info["evaluation"]["metricParams"]
356361

362+
def get_creation_date(self):
363+
"""
364+
Return the date and time of the creation of the Model Evaluation
357365
366+
:return: the date and time, as an epoch
367+
"""
368+
return self.full_info["evaluation"]["created"]

dataikuapi/dss/project.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -754,18 +754,15 @@ def create_managed_folder(self, name, folder_type=None, connection_name="filesys
754754
# Model evaluation stores
755755
########################################################
756756

757-
def list_model_evaluation_stores(self, as_type=None):
757+
def list_model_evaluation_stores(self):
758758
"""
759759
List the model evaluation stores in this project.
760760
761761
:returns: The list of the model evaluation stores
762-
:rtype: list
762+
:rtype: list of :class:`dataikuapi.dss.modelevaluationstore.DSSModelEvaluationStore`
763763
"""
764764
items = self.client._perform_json("GET", "/projects/%s/modelevaluationstores/" % self.project_key)
765-
if as_type == "objects" or as_type == "object":
766-
return [DSSModelEvaluationStore(self.client, self.project_key, item["id"]) for item in items]
767-
else:
768-
return items
765+
return [DSSModelEvaluationStore(self.client, self.project_key, item["id"]) for item in items]
769766

770767
def get_model_evaluation_store(self, mes_id):
771768
"""

0 commit comments

Comments
 (0)