Merge PR #60 utils: remove DSSInternalDict

instanceofme · web-flow · commit 56859e68219d · 2020-03-04T10:47:02.000+01:00
from tasks/dss70-remove-internal-dict
diff --git a/dataikuapi/dss/ml.py b/dataikuapi/dss/ml.py
@@ -1,7 +1,6 @@
 from ..utils import DataikuException
 from ..utils import DataikuUTF8CSVReader
 from ..utils import DataikuStreamedHttpUTF8CSVReader
-from ..utils import DSSInternalDict
 import json
 import time
 from .metrics import ComputedMetrics
@@ -824,28 +823,39 @@ def get_partial_dependencies(self):
         return DSSPartialDependencies(data)
 
 
-class DSSSubpopulationGlobal(DSSInternalDict):
+class DSSSubpopulationGlobal(object):
     """
     Object to read details of performance on global population used for subpopulation analyses.
 
     Do not create this object directly, use :meth:`DSSSubpopulationAnalyses.get_global()` instead
     """
 
     def __init__(self, data, prediction_type):
-        super(DSSSubpopulationGlobal, self).__init__(data)
+        self._internal_dict = data
         self.prediction_type = prediction_type
 
+    def get_raw(self):
+        """
+        Gets the raw dictionary of the global subpopulation performance
+
+        :rtype: dict
+        """
+        return self._internal_dict
+
+    def __repr__(self):
+        return "{cls}(prediction_type={type})".format(cls=self.__class__.__name__, type=self.prediction_type)
+
     def get_performance_metrics(self):
         """
         Gets the performance results of the global population used for the subpopulation analysis
         """
-        return self.get("performanceMetrics")
+        return self._internal_dict["performanceMetrics"]
 
     def get_prediction_info(self):
         """
         Gets the prediction info of the global population used for the subpopulation analysis
         """
-        global_metrics = self.get("perf").get("globalMetrics")
+        global_metrics = self._internal_dict["perf"]["globalMetrics"]
         if self.prediction_type == "BINARY_CLASSIFICATION":
             return {
                 "predictedPositiveRatio": global_metrics["predictionAvg"][0],
@@ -862,22 +872,37 @@ def get_prediction_info(self):
             }
 
 
-class DSSSubpopulationModality(DSSInternalDict):
+class DSSSubpopulationModality(object):
     """
     Object to read details of a subpopulation analysis modality
 
     Do not create this object directly, use :meth:`DSSSubpopulationAnalysis.get_modality_data(definition)` instead
     """
 
     def __init__(self, feature_name, computed_as_type, data, prediction_type):
-        super(DSSSubpopulationModality, self).__init__(data)
-
+        self._internal_dict = data
         self.prediction_type = prediction_type
         if computed_as_type == "CATEGORY":
             self.definition = DSSSubpopulationCategoryModalityDefinition(feature_name, data)
         elif computed_as_type == "NUMERIC":
             self.definition = DSSSubpopulationNumericModalityDefinition(feature_name, data)
-    
+
+    def get_raw(self):
+        """
+        Gets the raw dictionary of the subpopulation analysis modality
+
+        :rtype: dict
+        """
+        return self._internal_dict
+
+    def __repr__(self):
+        computed_as_type = "CATEGORY" if isinstance(self.definition, DSSSubpopulationCategoryModalityDefinition) else 'NUMERIC'
+        return "{cls}(prediction_type={type}, feature={feature}, computed_as={computed_as_type})".format(
+            cls=self.__class__.__name__,
+            type=self.prediction_type,
+            feature=self.definition.feature_name,
+            computed_as_type=computed_as_type)
+
     def get_definition(self):
         """
         Gets the definition of the subpopulation analysis modality
@@ -891,23 +916,23 @@ def is_excluded(self):
         """
         Whether modality has been excluded from analysis (e.g. too few rows in the subpopulation)
         """
-        return self.get("excluded", False)
+        return self._internal_dict.get("excluded", False)
 
     def get_performance_metrics(self):
         """
         Gets the performance results of the modality
         """
         if self.is_excluded():
             raise ValueError("Excluded modalities do not have performance metrics")
-        return self.get("performanceMetrics")
+        return self._internal_dict["performanceMetrics"]
 
     def get_prediction_info(self):
         """
         Gets the prediction info of the modality
         """
         if self.is_excluded():
             raise ValueError("Excluded modalities do not have prediction info")
-        global_metrics = self.get("perf").get("globalMetrics")
+        global_metrics = self._internal_dict["perf"]["globalMetrics"]
         if self.prediction_type == "BINARY_CLASSIFICATION":
             return {
                 "predictedPositiveRatio": global_metrics["predictionAvg"][0],
@@ -985,26 +1010,41 @@ def __repr__(self):
             return "DSSSubpopulationCategoryModalityDefinition(%s='%s')" % (self.feature_name, self.value)
 
 
-class DSSSubpopulationAnalysis(DSSInternalDict):
+class DSSSubpopulationAnalysis(object):
     """
     Object to read details of a subpopulation analysis of a trained model
 
     Do not create this object directly, use :meth:`DSSSubpopulationAnalyses.get_analysis(feature)` instead
     """
 
     def __init__(self, analysis, prediction_type):
-        super(DSSSubpopulationAnalysis, self).__init__(analysis)
-        self.computed_as_type = self.get("computed_as_type")
-        self.modalities = [DSSSubpopulationModality(analysis.get("feature"), self.computed_as_type, m, prediction_type) for m in self.get("modalities", [])]
+        self._internal_dict = analysis
+        self.computed_as_type = analysis["computed_as_type"]
+        self.modalities = [DSSSubpopulationModality(analysis["feature"], self.computed_as_type, m, prediction_type) for m in analysis.get("modalities", [])]
+
+    def get_raw(self):
+        """
+        Gets the raw dictionary of the subpopulation analysis
+
+        :rtype: dict
+        """
+        return self._internal_dict
+
+    def __repr__(self):
+        return "{cls}(computed_as_type={type}, feature={feature}, modalities_count={modalities_count})".format(
+            cls=self.__class__.__name__,
+            type=self.computed_as_type,
+            feature=self._internal_dict["feature"],
+            modalities_count=len(self.modalities))
 
     def get_computation_params(self):
         """
         Gets computation params
         """
         return {
-            "nbRecords":  self.get("nbRecords"),
-            "randomState":  self.get("randomState"),
-            "onSample":  self.get("onSample")
+            "nbRecords":  self._internal_dict["nbRecords"],
+            "randomState":  self._internal_dict["randomState"],
+            "onSample":  self._internal_dict["onSample"]
         }
     
     def list_modalities(self):
@@ -1046,88 +1086,122 @@ def get_modality_data(self, definition):
         raise ValueError("Modality not found: %s" % definition)
 
 
-class DSSSubpopulationAnalyses(DSSInternalDict):
+class DSSSubpopulationAnalyses(object):
     """
     Object to read details of subpopulation analyses of a trained model
 
     Do not create this object directly, use :meth:`DSSTrainedPredictionModelDetails.get_subpopulation_analyses()` instead
     """
 
     def __init__(self, data, prediction_type):
-        super(DSSSubpopulationAnalyses, self).__init__(data)
+        self._internal_dict = data
         self.prediction_type = prediction_type
         self.analyses = []
         for analysis in data.get("subpopulationAnalyses", []):
             self.analyses.append(DSSSubpopulationAnalysis(analysis, prediction_type))
-    
+
+    def get_raw(self):
+        """
+        Gets the raw dictionary of subpopulation analyses
+
+        :rtype: dict
+        """
+        return self._internal_dict
+
+    def __repr__(self):
+        return "{cls}(prediction_type={type}, analyses={analyses})".format(cls=self.__class__.__name__,
+                                                                           type=self.prediction_type,
+                                                                           analyses=self.list_analyses())
     def get_global(self):
         """
         Retrieves information and performance on the full dataset used to compute the subpopulation analyses
         """
-        return DSSSubpopulationGlobal(self.get("global"), self.prediction_type)
+        return DSSSubpopulationGlobal(self._internal_dict["global"], self.prediction_type)
 
     def list_analyses(self):
         """
         Lists all features on which subpopulation analyses have been computed
         """
-        return [analysis.get("feature") for analysis in self.analyses]
+        return [analysis.get_raw()["feature"] for analysis in self.analyses]
     
     def get_analysis(self, feature):
         """
         Retrieves the subpopulation analysis for a particular feature
         """
         try:
-            return next(analysis for analysis in self.analyses if analysis.get("feature") == feature)
+            return next(analysis for analysis in self.analyses if analysis.get_raw()["feature"] == feature)
         except StopIteration:
             raise ValueError("Subpopulation analysis for feature '%s' cannot be found" % feature)
 
 
-class DSSPartialDependence(DSSInternalDict):
+class DSSPartialDependence(object):
     """
     Object to read details of partial dependence of a trained model
 
     Do not create this object directly, use :meth:`DSSPartialDependencies.get_partial_dependence(feature)` instead
     """
 
     def __init__(self, data):
-        super(DSSPartialDependence, self).__init__(data)
+        self._internal_dict = data
+
+    def get_raw(self):
+        """
+        Gets the raw dictionary of the partial dependence
+
+        :rtype: dict
+        """
+        return self._internal_dict
+
+    def __repr__(self):
+        return "{cls}(feature={feature})".format(cls=self.__class__.__name__, feature=self._internal_dict["feature"])
 
     def get_computation_params(self):
         """
         Gets computation params
         """
         return {
-            "nbRecords":  self.get("nbRecords"),
-            "randomState":  self.get("randomState"),
-            "onSample":  self.get("onSample")
+            "nbRecords":  self._internal_dict["nbRecords"],
+            "randomState":  self._internal_dict["randomState"],
+            "onSample":  self._internal_dict["onSample"]
         }
 
 
-class DSSPartialDependencies(DSSInternalDict):
+class DSSPartialDependencies(object):
     """
     Object to read details of partial dependencies of a trained model
 
     Do not create this object directly, use :meth:`DSSTrainedPredictionModelDetails.get_partial_dependencies()` instead
     """
 
     def __init__(self, data):
-        super(DSSPartialDependencies, self).__init__(data)
+        self._internal_dict = data
         self.partial_dependencies = []
         for pd in data.get("partialDependencies", []):
             self.partial_dependencies.append(DSSPartialDependence(pd))
 
+    def get_raw(self):
+        """
+        Gets the raw dictionary of partial dependencies
+
+        :rtype: dict
+        """
+        return self._internal_dict
+
+    def __repr__(self):
+        return "{cls}(features={features})".format(cls=self.__class__.__name__, features=self.list_features())
+
     def list_features(self):
         """
         Lists all features on which partial dependencies have been computed
         """
-        return [partial_dep.get("feature") for partial_dep in self.partial_dependencies]
+        return [partial_dep.get_raw()["feature"] for partial_dep in self.partial_dependencies]
 
     def get_partial_dependence(self, feature):
         """
         Retrieves the partial dependencies for a particular feature
         """
         try:
-            return next(pd for pd in self.partial_dependencies if pd.get("feature") == feature)
+            return next(pd for pd in self.partial_dependencies if pd.get_raw()["feature"] == feature)
         except StopIteration:
             raise ValueError("Partial dependence for feature '%s' cannot be found" % feature)
 
@@ -1521,4 +1595,3 @@ def guess(self, prediction_type=None):
             "PUT",
             "/projects/%s/models/lab/%s/%s/guess" % (self.project_key, self.analysis_id, self.mltask_id),
             params = obj)
-
diff --git a/dataikuapi/utils.py b/dataikuapi/utils.py
@@ -94,28 +94,3 @@ def str_to_bool(s):
                                                 doublequote=True):
                 yield [none_if_throws(caster)(val)
                         for (caster, val) in dku_zip_longest(casters, uncasted_tuple)]
-
-class DSSInternalDict(object):
-    """
-    Class that provides some helpers and an `_internal_dict` dict field that is the actual holder of the data.
-    """
-    
-    def __init__(self, orig_dict=None):
-        if orig_dict is None:
-            self._internal_dict = dict()
-        else:
-            self._internal_dict = orig_dict
-
-    def get(self, name, default=None):
-        return self._internal_dict.get(name, default)
-
-    def get_raw(self):
-        """
-        Gets the raw dictionary of the actual data
-
-        :rtype: dict
-        """
-        return self._internal_dict
-
-    def __repr__(self):
-        return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"