55from .discussion import DSSObjectDiscussions
66from .future import DSSFuture
77
8- from requests import utils
9-
108try :
119 basestring
1210except NameError :
@@ -169,7 +167,7 @@ def delete_model_evaluations(self, evaluations):
169167
170168 def build (self , job_type = "NON_RECURSIVE_FORCED_BUILD" , wait = True , no_fail = False ):
171169 """
172- Starts a new job to build this Model Evaluation Store and wait for it to complete.
170+ Starts a new job to build this model evaluation store and wait for it to complete.
173171 Raises if the job failed.
174172
175173 .. code-block:: python
@@ -274,6 +272,8 @@ def __init__(self, model_evaluation_store, evaluation_id):
274272 def get_full_info (self ):
275273 """
276274 Retrieve the model evaluation with its performance data
275+
276+ :return: the model evaluation full info, as a :class:`dataikuapi.dss.DSSModelEvaluationInfo`
277277 """
278278 data = self .client ._perform_json (
279279 "GET" , "/projects/%s/modelevaluationstores/%s/evaluations/%s" % (self .project_key , self .mes_id , self .evaluation_id ))
@@ -301,21 +301,25 @@ def compute_data_drift(self, reference=None, data_drift_params=None, wait=True):
301301 :param reference: saved model version (full ID or DSSTrainedPredictionModelDetails)
302302 or model evaluation (full ID or DSSModelEvaluation) to use as reference (optional)
303303 :type reference: Union[str, DSSModelEvaluation, DSSTrainedPredictionModelDetails]
304- :param data_drift_params: data drift computation settings (optional)
304+ :param data_drift_params: data drift computation settings as a :class:`dataikuapi.dss.modelevaluationstore.DataDriftParams` (optional)
305+ :type data_drift_params: DataDriftParams
305306 :param wait: data drift computation settings (optional)
306- :returns: a `dict ` containing data drift analysis results if `wait` is `True`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
307+ :returns: a :class:`dataikuapi.dss.modelevaluationstore.DataDriftResult ` containing data drift analysis results if `wait` is `True`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
307308 """
308309
309310 if hasattr (reference , 'full_id' ):
310311 reference = reference .full_id
311312
313+ if data_drift_params :
314+ data_drift_params = data_drift_params .data
315+
312316 future_response = self .client ._perform_json (
313317 "POST" , "/projects/%s/modelevaluationstores/%s/evaluations/%s/computeDataDrift" % (self .project_key , self .mes_id , self .evaluation_id ),
314318 body = {
315319 "referenceId" : reference ,
316320 "dataDriftParams" : data_drift_params
317321 })
318- future = DSSFuture (self .client , future_response .get ('jobId' , None ), future_response )
322+ future = DSSFuture (self .client , future_response .get ('jobId' , None ), future_response , result_wrapper = DataDriftResult )
319323 return future .wait_for_result () if wait else future
320324
321325 def get_metrics (self ):
@@ -361,39 +365,190 @@ class DSSModelEvaluationFullInfo:
361365 def __init__ (self , model_evaluation , full_info ):
362366 self .model_evaluation = model_evaluation
363367 self .full_info = full_info
368+ self .metrics = self .full_info ["metrics" ] # type: dict
369+ """The performance and data drift metric, if any."""
370+ self .creation_date = self .full_info ["evaluation" ]["created" ] # type: int
371+ """The date and time of the creation of the model evaluation, as an epoch."""
372+ self .full_id = self .full_info ["evaluation" ]["ref" ]["fullId" ] # type: str
373+ self .model_full_id = self .full_info ["evaluation" ]["modelRef" ]["fullId" ] # type: str
374+ self .prediction_type = self .full_info ["evaluation" ]["predictionType" ] # type: str
375+ self .prediction_variable = self .full_info ["evaluation" ]["predictionVariable" ] # type: str
376+ self .target_variable = self .full_info ["evaluation" ]["targetVariable" ] # type: str
377+ self .user_meta = self .full_info ["evaluation" ]["userMeta" ] # type: dict
378+ """The user-accessible metadata (name, labels)
379+ Returns the original object, not a copy. Changes to the returned object are persisted to DSS by calling :meth:`save_user_meta`."""
364380
365381 def get_raw (self ):
366382 return self .full_info
367383
368- def get_metrics (self ):
384+ def save_user_meta (self ):
385+ return self .model_evaluation .client ._perform_text (
386+ "PUT" , "/projects/%s/modelevaluationstores/%s/evaluations/%s/user-meta" %
387+ (self .model_evaluation .project_key , self .model_evaluation .mes_id , self .model_evaluation .evaluation_id ), body = self .user_meta )
388+
389+
390+ class DataDriftParams (object ):
391+ """
392+ Object that represents parameters for data drift computation.
393+ Do not create this object directly, use :meth:`dataikuapi.dss.modelevaluationstore.DataDriftParams.from_params` instead.
394+ """
395+ def __init__ (self , data ):
396+ self .data = data
397+
398+ def __repr__ (self ):
399+ return u"{}({})" .format (self .__class__ .__name__ , self .data )
400+
401+ @staticmethod
402+ def from_params (per_column_settings , nb_bins = 10 , compute_histograms = True , confidence_level = 0.95 ):
369403 """
370- Get the metrics evaluated, if any.
404+ Creates parameters for data drift computation from columns, number of bins, compute histograms and confidence level
405+
406+ :param dict per_column_settings: A dict representing the per column settings.
407+ You should use a :class:`~dataikuapi.dss.modelevaluationstore.PerColumnDriftParamBuilder` to build it.
408+ :param int nb_bins: (optional) Nb. bins in histograms (apply to all columns) - default: 10
409+ :param bool compute_histograms: (optional) Enable/disable histograms - default: True
410+ :param float confidence_level: (optional) Used to compute confidence interval on drift's model accuracy - default: 0.95
371411
372- :return: a dict containing the performance and data drift metric, if any
412+ :rtype: :class:`dataikuapi.dss.modelevaluationstore.DataDriftParams`
373413 """
374- return self .full_info ["metrics" ]
414+ return DataDriftParams ({
415+ "columns" : per_column_settings ,
416+ "nbBins" : nb_bins ,
417+ "computeHistograms" : compute_histograms ,
418+ "confidenceLevel" : confidence_level
419+ })
375420
376- def get_labels (self ):
421+
422+ class PerColumnDriftParamBuilder (object ):
423+ """
424+ Builder for a map of per column drift params settings.
425+ Used as a helper before computing data drift to build columns param expected in
426+ :meth:`dataikuapi.dss.modelevaluationstore.DataDriftParams.from_params`.
427+ """
428+ def __init__ (self ):
429+ self .columns = {}
430+
431+ def build (self ):
432+ """Returns the built dict for per column drift params settings"""
433+ return self .columns
434+
435+ def with_column_drift_param (self , name , handling = "AUTO" , enabled = True ):
377436 """
378- Get the labels of the Model Evaluation
437+ Sets the drift params settings for given column name.
379438
380- :return: a dict containing the labels
439+ :param: string name: The name of the column
440+ :param: string handling: (optional) The column type, should be either NUMERICAL, CATEGORICAL or AUTO (default: AUTO)
441+ :param: bool enabled: (optional) False means the column is ignored in drift computation (default: True)
381442 """
382- return self .full_info ["evaluation" ]["labels" ]
443+ self .columns [name ] = {
444+ "handling" : handling ,
445+ "enabled" : enabled
446+ }
447+ return self
448+
449+
450+ class DataDriftResult (object ):
451+ """
452+ A handle on the data drift result of a model evaluation.
453+
454+ Do not create this class directly, instead use :meth:`dataikuapi.dss.DSSModelEvaluation.compute_data_drift`
455+ """
456+ def __init__ (self , data ):
457+ self .data = data
458+ self .drift_model_result = DriftModelResult (self .data ["driftModelResult" ])
459+ """Drift analysis based on drift modeling."""
460+ self .univariate_drift_result = UnivariateDriftResult (self .data ["univariateDriftResult" ])
461+ """Per-column drift analysis based on pairwise comparison of distributions."""
462+ self .per_column_settings = [ColumnSettings (cs ) for cs in self .data ["perColumnSettings" ]]
463+ """Information about column handling that has been used (errors, types, etc)."""
383464
384- def get_evaluation_parameters (self ):
465+ def get_raw (self ):
466+ """
467+ :return: the raw data drift result
468+ :rtype: dict
385469 """
386- Get info on the evaluation parameters, most noticeably the evaluation metric (evaluationMetric field
387- of the returned dict)
470+ return self . data
471+
388472
389- :return: a dict
473+ class DriftModelResult (object ):
474+ """
475+ A handle on the drift model result.
476+
477+ Do not create this class directly, instead use :attr:`dataikuapi.dss.modelevaluationstore.DataDriftResult.drift_model_result`
478+ """
479+ def __init__ (self , data ):
480+ self .data = data
481+ self .drift_model_accuracy = DriftModelAccuracy (self .data ["driftModelAccuracy" ])
482+ self .feature_drift_importance = self .data ["driftVersusImportance" ] # type: dict
483+
484+ def get_raw (self ):
390485 """
391- return self .full_info ["evaluation" ]["metricParams" ]
486+ :return: the raw drift model result
487+ :rtype: dict
488+ """
489+ return self .data
490+
491+
492+ class UnivariateDriftResult (object ):
493+ """
494+ A handle on the univariate data drift.
495+
496+ Do not create this class directly, instead use :attr:`dataikuapi.dss.modelevaluationstore.DataDriftResult.univariate_drift_result`
497+ """
498+ def __init__ (self , data ):
499+ self .data = data
500+ self .per_column_drift_data = self .data ["columns" ] # type: dict
501+ """Drift data per column, as a dict of column name -> drift data."""
502+
503+ def get_raw (self ):
504+ """
505+ :return: the raw univariate data drift
506+ :rtype: dict
507+ """
508+ return self .data
509+
392510
393- def get_creation_date (self ):
511+ class ColumnSettings (object ):
512+ """
513+ A handle on column handling information.
514+
515+ Do not create this class directly, instead use :meth:`dataikuapi.dss.modelevaluationstore.DataDriftResult.get_per_column_settings`
516+ """
517+ def __init__ (self , data ):
518+ self .data = data
519+ self .name = self .data ["name" ] # type: str
520+ self .actual_column_handling = self .data ["actualHandling" ] # type: str
521+ """The actual column handling (either forced via drift params or inferred from model evaluation preprocessings).
522+ It can be any of NUMERICAL, CATEGORICAL, or IGNORED."""
523+ self .default_column_handling = self .data ["defaultHandling" ] # type: str
524+ """The default column handling (based on model evaluation preprocessing only).
525+ It can be any of NUMERICAL, CATEGORICAL, or IGNORED."""
526+ self .error_message = self .data .get ("errorMessage" , None )
527+
528+ def get_raw (self ):
529+ """
530+ :return: the raw column handling information
531+ :rtype: dict
394532 """
395- Return the date and time of the creation of the Model Evaluation
533+ return self .data
534+
396535
397- :return: the date and time, as an epoch
536+ class DriftModelAccuracy (object ):
537+ """
538+ A handle on the drift model accuracy.
539+
540+ Do not create this class directly, instead use :attr:`dataikuapi.dss.modelevaluationstore.DriftModelResult.drift_model_accuracy`
541+ """
542+ def __init__ (self , data ):
543+ self .data = data
544+ self .value = self .data ["value" ] # type: float
545+ self .lower_confidence_interval = self .data ["lower" ] # type: float
546+ self .upper_confidence_interval = self .data ["upper" ] # type: float
547+ self .pvalue = self .data ["pvalue" ] # type: float
548+
549+ def get_raw (self ):
550+ """
551+ :return: the drift model accuracy data
552+ :rtype: dict
398553 """
399- return self .full_info [ "evaluation" ][ "created" ]
554+ return self .data
0 commit comments