Skip to content

Commit e3192b1

Browse files
[sc-71083] Scratching itches on the Model Evaluation Stores API (#180)
* Rename get_full_info to get_evaluation_full_info Add getters for properties of DSSModelEvaluationFullInfo Document data drift parameters and results * Rollback renaming of get_full_info to get_evaluation_full_info * Add consistency to naming styles in the documentation * Rename ColumnReport to ColumnSettings * Change some getters to properties for primitive types Return raw data for get_raw in UnivariateDriftResult and add a property for getting per column data drift info. * Taking into account PR review * Removing unimportant helpers * Use a list comprehension instead of a `list(map())` for wider compatibility * Remove _sample_size attributes * Fix parameter naming & doc * Better doc for `enabled` param of `PerColumnDriftParamBuilder.with_column_drift_param`
1 parent 2e26e09 commit e3192b1

File tree

1 file changed

+178
-23
lines changed

1 file changed

+178
-23
lines changed

dataikuapi/dss/modelevaluationstore.py

Lines changed: 178 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
from .discussion import DSSObjectDiscussions
66
from .future import DSSFuture
77

8-
from requests import utils
9-
108
try:
119
basestring
1210
except NameError:
@@ -169,7 +167,7 @@ def delete_model_evaluations(self, evaluations):
169167

170168
def build(self, job_type="NON_RECURSIVE_FORCED_BUILD", wait=True, no_fail=False):
171169
"""
172-
Starts a new job to build this Model Evaluation Store and wait for it to complete.
170+
Starts a new job to build this model evaluation store and wait for it to complete.
173171
Raises if the job failed.
174172
175173
.. code-block:: python
@@ -274,6 +272,8 @@ def __init__(self, model_evaluation_store, evaluation_id):
274272
def get_full_info(self):
275273
"""
276274
Retrieve the model evaluation with its performance data
275+
276+
:return: the model evaluation full info, as a :class:`dataikuapi.dss.DSSModelEvaluationInfo`
277277
"""
278278
data = self.client._perform_json(
279279
"GET", "/projects/%s/modelevaluationstores/%s/evaluations/%s" % (self.project_key, self.mes_id, self.evaluation_id))
@@ -301,21 +301,25 @@ def compute_data_drift(self, reference=None, data_drift_params=None, wait=True):
301301
:param reference: saved model version (full ID or DSSTrainedPredictionModelDetails)
302302
or model evaluation (full ID or DSSModelEvaluation) to use as reference (optional)
303303
:type reference: Union[str, DSSModelEvaluation, DSSTrainedPredictionModelDetails]
304-
:param data_drift_params: data drift computation settings (optional)
304+
:param data_drift_params: data drift computation settings as a :class:`dataikuapi.dss.modelevaluationstore.DataDriftParams` (optional)
305+
:type data_drift_params: DataDriftParams
305306
:param wait: data drift computation settings (optional)
306-
:returns: a `dict` containing data drift analysis results if `wait` is `True`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
307+
:returns: a :class:`dataikuapi.dss.modelevaluationstore.DataDriftResult` containing data drift analysis results if `wait` is `True`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
307308
"""
308309

309310
if hasattr(reference, 'full_id'):
310311
reference = reference.full_id
311312

313+
if data_drift_params:
314+
data_drift_params = data_drift_params.data
315+
312316
future_response = self.client._perform_json(
313317
"POST", "/projects/%s/modelevaluationstores/%s/evaluations/%s/computeDataDrift" % (self.project_key, self.mes_id, self.evaluation_id),
314318
body={
315319
"referenceId": reference,
316320
"dataDriftParams": data_drift_params
317321
})
318-
future = DSSFuture(self.client, future_response.get('jobId', None), future_response)
322+
future = DSSFuture(self.client, future_response.get('jobId', None), future_response, result_wrapper=DataDriftResult)
319323
return future.wait_for_result() if wait else future
320324

321325
def get_metrics(self):
@@ -361,39 +365,190 @@ class DSSModelEvaluationFullInfo:
361365
def __init__(self, model_evaluation, full_info):
362366
self.model_evaluation = model_evaluation
363367
self.full_info = full_info
368+
self.metrics = self.full_info["metrics"] # type: dict
369+
"""The performance and data drift metric, if any."""
370+
self.creation_date = self.full_info["evaluation"]["created"] # type: int
371+
"""The date and time of the creation of the model evaluation, as an epoch."""
372+
self.full_id = self.full_info["evaluation"]["ref"]["fullId"] # type: str
373+
self.model_full_id = self.full_info["evaluation"]["modelRef"]["fullId"] # type: str
374+
self.prediction_type = self.full_info["evaluation"]["predictionType"] # type: str
375+
self.prediction_variable = self.full_info["evaluation"]["predictionVariable"] # type: str
376+
self.target_variable = self.full_info["evaluation"]["targetVariable"] # type: str
377+
self.user_meta = self.full_info["evaluation"]["userMeta"] # type: dict
378+
"""The user-accessible metadata (name, labels)
379+
Returns the original object, not a copy. Changes to the returned object are persisted to DSS by calling :meth:`save_user_meta`."""
364380

365381
def get_raw(self):
366382
return self.full_info
367383

368-
def get_metrics(self):
384+
def save_user_meta(self):
385+
return self.model_evaluation.client._perform_text(
386+
"PUT", "/projects/%s/modelevaluationstores/%s/evaluations/%s/user-meta" %
387+
(self.model_evaluation.project_key, self.model_evaluation.mes_id, self.model_evaluation.evaluation_id), body=self.user_meta)
388+
389+
390+
class DataDriftParams(object):
391+
"""
392+
Object that represents parameters for data drift computation.
393+
Do not create this object directly, use :meth:`dataikuapi.dss.modelevaluationstore.DataDriftParams.from_params` instead.
394+
"""
395+
def __init__(self, data):
396+
self.data = data
397+
398+
def __repr__(self):
399+
return u"{}({})".format(self.__class__.__name__, self.data)
400+
401+
@staticmethod
402+
def from_params(per_column_settings, nb_bins=10, compute_histograms=True, confidence_level=0.95):
369403
"""
370-
Get the metrics evaluated, if any.
404+
Creates parameters for data drift computation from columns, number of bins, compute histograms and confidence level
405+
406+
:param dict per_column_settings: A dict representing the per column settings.
407+
You should use a :class:`~dataikuapi.dss.modelevaluationstore.PerColumnDriftParamBuilder` to build it.
408+
:param int nb_bins: (optional) Nb. bins in histograms (apply to all columns) - default: 10
409+
:param bool compute_histograms: (optional) Enable/disable histograms - default: True
410+
:param float confidence_level: (optional) Used to compute confidence interval on drift's model accuracy - default: 0.95
371411
372-
:return: a dict containing the performance and data drift metric, if any
412+
:rtype: :class:`dataikuapi.dss.modelevaluationstore.DataDriftParams`
373413
"""
374-
return self.full_info["metrics"]
414+
return DataDriftParams({
415+
"columns": per_column_settings,
416+
"nbBins": nb_bins,
417+
"computeHistograms": compute_histograms,
418+
"confidenceLevel": confidence_level
419+
})
375420

376-
def get_labels(self):
421+
422+
class PerColumnDriftParamBuilder(object):
423+
"""
424+
Builder for a map of per column drift params settings.
425+
Used as a helper before computing data drift to build columns param expected in
426+
:meth:`dataikuapi.dss.modelevaluationstore.DataDriftParams.from_params`.
427+
"""
428+
def __init__(self):
429+
self.columns = {}
430+
431+
def build(self):
432+
"""Returns the built dict for per column drift params settings"""
433+
return self.columns
434+
435+
def with_column_drift_param(self, name, handling="AUTO", enabled=True):
377436
"""
378-
Get the labels of the Model Evaluation
437+
Sets the drift params settings for given column name.
379438
380-
:return: a dict containing the labels
439+
:param: string name: The name of the column
440+
:param: string handling: (optional) The column type, should be either NUMERICAL, CATEGORICAL or AUTO (default: AUTO)
441+
:param: bool enabled: (optional) False means the column is ignored in drift computation (default: True)
381442
"""
382-
return self.full_info["evaluation"]["labels"]
443+
self.columns[name] = {
444+
"handling": handling,
445+
"enabled": enabled
446+
}
447+
return self
448+
449+
450+
class DataDriftResult(object):
451+
"""
452+
A handle on the data drift result of a model evaluation.
453+
454+
Do not create this class directly, instead use :meth:`dataikuapi.dss.DSSModelEvaluation.compute_data_drift`
455+
"""
456+
def __init__(self, data):
457+
self.data = data
458+
self.drift_model_result = DriftModelResult(self.data["driftModelResult"])
459+
"""Drift analysis based on drift modeling."""
460+
self.univariate_drift_result = UnivariateDriftResult(self.data["univariateDriftResult"])
461+
"""Per-column drift analysis based on pairwise comparison of distributions."""
462+
self.per_column_settings = [ColumnSettings(cs) for cs in self.data["perColumnSettings"]]
463+
"""Information about column handling that has been used (errors, types, etc)."""
383464

384-
def get_evaluation_parameters(self):
465+
def get_raw(self):
466+
"""
467+
:return: the raw data drift result
468+
:rtype: dict
385469
"""
386-
Get info on the evaluation parameters, most noticeably the evaluation metric (evaluationMetric field
387-
of the returned dict)
470+
return self.data
471+
388472

389-
:return: a dict
473+
class DriftModelResult(object):
474+
"""
475+
A handle on the drift model result.
476+
477+
Do not create this class directly, instead use :attr:`dataikuapi.dss.modelevaluationstore.DataDriftResult.drift_model_result`
478+
"""
479+
def __init__(self, data):
480+
self.data = data
481+
self.drift_model_accuracy = DriftModelAccuracy(self.data["driftModelAccuracy"])
482+
self.feature_drift_importance = self.data["driftVersusImportance"] # type: dict
483+
484+
def get_raw(self):
390485
"""
391-
return self.full_info["evaluation"]["metricParams"]
486+
:return: the raw drift model result
487+
:rtype: dict
488+
"""
489+
return self.data
490+
491+
492+
class UnivariateDriftResult(object):
493+
"""
494+
A handle on the univariate data drift.
495+
496+
Do not create this class directly, instead use :attr:`dataikuapi.dss.modelevaluationstore.DataDriftResult.univariate_drift_result`
497+
"""
498+
def __init__(self, data):
499+
self.data = data
500+
self.per_column_drift_data = self.data["columns"] # type: dict
501+
"""Drift data per column, as a dict of column name -> drift data."""
502+
503+
def get_raw(self):
504+
"""
505+
:return: the raw univariate data drift
506+
:rtype: dict
507+
"""
508+
return self.data
509+
392510

393-
def get_creation_date(self):
511+
class ColumnSettings(object):
512+
"""
513+
A handle on column handling information.
514+
515+
Do not create this class directly, instead use :meth:`dataikuapi.dss.modelevaluationstore.DataDriftResult.get_per_column_settings`
516+
"""
517+
def __init__(self, data):
518+
self.data = data
519+
self.name = self.data["name"] # type: str
520+
self.actual_column_handling = self.data["actualHandling"] # type: str
521+
"""The actual column handling (either forced via drift params or inferred from model evaluation preprocessings).
522+
It can be any of NUMERICAL, CATEGORICAL, or IGNORED."""
523+
self.default_column_handling = self.data["defaultHandling"] # type: str
524+
"""The default column handling (based on model evaluation preprocessing only).
525+
It can be any of NUMERICAL, CATEGORICAL, or IGNORED."""
526+
self.error_message = self.data.get("errorMessage", None)
527+
528+
def get_raw(self):
529+
"""
530+
:return: the raw column handling information
531+
:rtype: dict
394532
"""
395-
Return the date and time of the creation of the Model Evaluation
533+
return self.data
534+
396535

397-
:return: the date and time, as an epoch
536+
class DriftModelAccuracy(object):
537+
"""
538+
A handle on the drift model accuracy.
539+
540+
Do not create this class directly, instead use :attr:`dataikuapi.dss.modelevaluationstore.DriftModelResult.drift_model_accuracy`
541+
"""
542+
def __init__(self, data):
543+
self.data = data
544+
self.value = self.data["value"] # type: float
545+
self.lower_confidence_interval = self.data["lower"] # type: float
546+
self.upper_confidence_interval = self.data["upper"] # type: float
547+
self.pvalue = self.data["pvalue"] # type: float
548+
549+
def get_raw(self):
550+
"""
551+
:return: the drift model accuracy data
552+
:rtype: dict
398553
"""
399-
return self.full_info["evaluation"]["created"]
554+
return self.data

0 commit comments

Comments
 (0)