Skip to content

Commit 1667e25

Browse files
committed
utils: remove DSSInternalDict
1 parent 166a248 commit 1667e25

File tree

2 files changed

+113
-58
lines changed

2 files changed

+113
-58
lines changed

dataikuapi/dss/ml.py

Lines changed: 113 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from ..utils import DataikuException
22
from ..utils import DataikuUTF8CSVReader
33
from ..utils import DataikuStreamedHttpUTF8CSVReader
4-
from ..utils import DSSInternalDict
54
import json
65
import time
76
from .metrics import ComputedMetrics
@@ -824,28 +823,42 @@ def get_partial_dependencies(self):
824823
return DSSPartialDependencies(data)
825824

826825

827-
class DSSSubpopulationGlobal(DSSInternalDict):
826+
class DSSSubpopulationGlobal(object):
828827
"""
829828
Object to read details of performance on global population used for subpopulation analyses.
830829
831830
Do not create this object directly, use :meth:`DSSSubpopulationAnalyses.get_global()` instead
832831
"""
833832

834833
def __init__(self, data, prediction_type):
835-
super(DSSSubpopulationGlobal, self).__init__(data)
834+
if data is None:
835+
self._internal_dict = dict()
836+
else:
837+
self._internal_dict = data
836838
self.prediction_type = prediction_type
837839

840+
def get_raw(self):
841+
"""
842+
Gets the raw dictionary of the actual data
843+
844+
:rtype: dict
845+
"""
846+
return self._internal_dict
847+
848+
def __repr__(self):
849+
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"
850+
838851
def get_performance_metrics(self):
839852
"""
840853
Gets the performance results of the global population used for the subpopulation analysis
841854
"""
842-
return self.get("performanceMetrics")
855+
return self._internal_dict["performanceMetrics"]
843856

844857
def get_prediction_info(self):
845858
"""
846859
Gets the prediction info of the global population used for the subpopulation analysis
847860
"""
848-
global_metrics = self.get("perf").get("globalMetrics")
861+
global_metrics = self._internal_dict["perf"]["globalMetrics"]
849862
if self.prediction_type == "BINARY_CLASSIFICATION":
850863
return {
851864
"predictedPositiveRatio": global_metrics["predictionAvg"][0],
@@ -862,22 +875,36 @@ def get_prediction_info(self):
862875
}
863876

864877

865-
class DSSSubpopulationModality(DSSInternalDict):
878+
class DSSSubpopulationModality(object):
866879
"""
867880
Object to read details of a subpopulation analysis modality
868881
869882
Do not create this object directly, use :meth:`DSSSubpopulationAnalysis.get_modality_data(definition)` instead
870883
"""
871884

872885
def __init__(self, feature_name, computed_as_type, data, prediction_type):
873-
super(DSSSubpopulationModality, self).__init__(data)
886+
if data is None:
887+
self._internal_dict = dict()
888+
else:
889+
self._internal_dict = data
874890

875891
self.prediction_type = prediction_type
876892
if computed_as_type == "CATEGORY":
877893
self.definition = DSSSubpopulationCategoryModalityDefinition(feature_name, data)
878894
elif computed_as_type == "NUMERIC":
879895
self.definition = DSSSubpopulationNumericModalityDefinition(feature_name, data)
880-
896+
897+
def get_raw(self):
898+
"""
899+
Gets the raw dictionary of the actual data
900+
901+
:rtype: dict
902+
"""
903+
return self._internal_dict
904+
905+
def __repr__(self):
906+
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"
907+
881908
def get_definition(self):
882909
"""
883910
Gets the definition of the subpopulation analysis modality
@@ -891,23 +918,23 @@ def is_excluded(self):
891918
"""
892919
Whether modality has been excluded from analysis (e.g. too few rows in the subpopulation)
893920
"""
894-
return self.get("excluded", False)
921+
return self._internal_dict.get("excluded", False)
895922

896923
def get_performance_metrics(self):
897924
"""
898925
Gets the performance results of the modality
899926
"""
900927
if self.is_excluded():
901928
raise ValueError("Excluded modalities do not have performance metrics")
902-
return self.get("performanceMetrics")
929+
return self._internal_dict["performanceMetrics"]
903930

904931
def get_prediction_info(self):
905932
"""
906933
Gets the prediction info of the modality
907934
"""
908935
if self.is_excluded():
909936
raise ValueError("Excluded modalities do not have prediction info")
910-
global_metrics = self.get("perf").get("globalMetrics")
937+
global_metrics = self._internal_dict["perf"]["globalMetrics"]
911938
if self.prediction_type == "BINARY_CLASSIFICATION":
912939
return {
913940
"predictedPositiveRatio": global_metrics["predictionAvg"][0],
@@ -985,26 +1012,37 @@ def __repr__(self):
9851012
return "DSSSubpopulationCategoryModalityDefinition(%s='%s')" % (self.feature_name, self.value)
9861013

9871014

988-
class DSSSubpopulationAnalysis(DSSInternalDict):
1015+
class DSSSubpopulationAnalysis(object):
9891016
"""
9901017
Object to read details of a subpopulation analysis of a trained model
9911018
9921019
Do not create this object directly, use :meth:`DSSSubpopulationAnalyses.get_analysis(feature)` instead
9931020
"""
9941021

9951022
def __init__(self, analysis, prediction_type):
996-
super(DSSSubpopulationAnalysis, self).__init__(analysis)
997-
self.computed_as_type = self.get("computed_as_type")
998-
self.modalities = [DSSSubpopulationModality(analysis.get("feature"), self.computed_as_type, m, prediction_type) for m in self.get("modalities", [])]
1023+
self._internal_dict = analysis
1024+
self.computed_as_type = analysis["computed_as_type"]
1025+
self.modalities = [DSSSubpopulationModality(analysis["feature"], self.computed_as_type, m, prediction_type) for m in analysis.get("modalities", [])]
1026+
1027+
def get_raw(self):
1028+
"""
1029+
Gets the raw dictionary of the actual data
1030+
1031+
:rtype: dict
1032+
"""
1033+
return self._internal_dict
1034+
1035+
def __repr__(self):
1036+
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"
9991037

10001038
def get_computation_params(self):
10011039
"""
10021040
Gets computation params
10031041
"""
10041042
return {
1005-
"nbRecords": self.get("nbRecords"),
1006-
"randomState": self.get("randomState"),
1007-
"onSample": self.get("onSample")
1043+
"nbRecords": self._internal_dict["nbRecords"],
1044+
"randomState": self._internal_dict["randomState"],
1045+
"onSample": self._internal_dict["onSample"]
10081046
}
10091047

10101048
def list_modalities(self):
@@ -1046,88 +1084,130 @@ def get_modality_data(self, definition):
10461084
raise ValueError("Modality not found: %s" % definition)
10471085

10481086

1049-
class DSSSubpopulationAnalyses(DSSInternalDict):
1087+
class DSSSubpopulationAnalyses(object):
10501088
"""
10511089
Object to read details of subpopulation analyses of a trained model
10521090
10531091
Do not create this object directly, use :meth:`DSSTrainedPredictionModelDetails.get_subpopulation_analyses()` instead
10541092
"""
10551093

10561094
def __init__(self, data, prediction_type):
1057-
super(DSSSubpopulationAnalyses, self).__init__(data)
1095+
if data is None:
1096+
self._internal_dict = dict()
1097+
else:
1098+
self._internal_dict = data
10581099
self.prediction_type = prediction_type
10591100
self.analyses = []
10601101
for analysis in data.get("subpopulationAnalyses", []):
10611102
self.analyses.append(DSSSubpopulationAnalysis(analysis, prediction_type))
1062-
1103+
1104+
def get_raw(self):
1105+
"""
1106+
Gets the raw dictionary of the actual data
1107+
1108+
:rtype: dict
1109+
"""
1110+
return self._internal_dict
1111+
1112+
def __repr__(self):
1113+
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"
1114+
10631115
def get_global(self):
10641116
"""
10651117
Retrieves information and performance on the full dataset used to compute the subpopulation analyses
10661118
"""
1067-
return DSSSubpopulationGlobal(self.get("global"), self.prediction_type)
1119+
return DSSSubpopulationGlobal(self._internal_dict["global"], self.prediction_type)
10681120

10691121
def list_analyses(self):
10701122
"""
10711123
Lists all features on which subpopulation analyses have been computed
10721124
"""
1073-
return [analysis.get("feature") for analysis in self.analyses]
1125+
return [analysis.get_raw()["feature"] for analysis in self.analyses]
10741126

10751127
def get_analysis(self, feature):
10761128
"""
10771129
Retrieves the subpopulation analysis for a particular feature
10781130
"""
10791131
try:
1080-
return next(analysis for analysis in self.analyses if analysis.get("feature") == feature)
1132+
return next(analysis for analysis in self.analyses if analysis.get_raw()["feature"] == feature)
10811133
except StopIteration:
10821134
raise ValueError("Subpopulation analysis for feature '%s' cannot be found" % feature)
10831135

10841136

1085-
class DSSPartialDependence(DSSInternalDict):
1137+
class DSSPartialDependence(object):
10861138
"""
10871139
Object to read details of partial dependence of a trained model
10881140
10891141
Do not create this object directly, use :meth:`DSSPartialDependencies.get_partial_dependence(feature)` instead
10901142
"""
10911143

10921144
def __init__(self, data):
1093-
super(DSSPartialDependence, self).__init__(data)
1145+
if data is None:
1146+
self._internal_dict = dict()
1147+
else:
1148+
self._internal_dict = data
1149+
1150+
def get_raw(self):
1151+
"""
1152+
Gets the raw dictionary of the actual data
1153+
1154+
:rtype: dict
1155+
"""
1156+
return self._internal_dict
1157+
1158+
def __repr__(self):
1159+
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"
10941160

10951161
def get_computation_params(self):
10961162
"""
10971163
Gets computation params
10981164
"""
10991165
return {
1100-
"nbRecords": self.get("nbRecords"),
1101-
"randomState": self.get("randomState"),
1102-
"onSample": self.get("onSample")
1166+
"nbRecords": self._internal_dict["nbRecords"],
1167+
"randomState": self._internal_dict["randomState"],
1168+
"onSample": self._internal_dict["onSample"]
11031169
}
11041170

11051171

1106-
class DSSPartialDependencies(DSSInternalDict):
1172+
class DSSPartialDependencies(object):
11071173
"""
11081174
Object to read details of partial dependencies of a trained model
11091175
11101176
Do not create this object directly, use :meth:`DSSTrainedPredictionModelDetails.get_partial_dependencies()` instead
11111177
"""
11121178

11131179
def __init__(self, data):
1114-
super(DSSPartialDependencies, self).__init__(data)
1180+
if data is None:
1181+
self._internal_dict = dict()
1182+
else:
1183+
self._internal_dict = data
11151184
self.partial_dependencies = []
11161185
for pd in data.get("partialDependencies", []):
11171186
self.partial_dependencies.append(DSSPartialDependence(pd))
11181187

1188+
def get_raw(self):
1189+
"""
1190+
Gets the raw dictionary of the actual data
1191+
1192+
:rtype: dict
1193+
"""
1194+
return self._internal_dict
1195+
1196+
def __repr__(self):
1197+
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"
1198+
11191199
def list_features(self):
11201200
"""
11211201
Lists all features on which partial dependencies have been computed
11221202
"""
1123-
return [partial_dep.get("feature") for partial_dep in self.partial_dependencies]
1203+
return [partial_dep.get_raw()["feature"] for partial_dep in self.partial_dependencies]
11241204

11251205
def get_partial_dependence(self, feature):
11261206
"""
11271207
Retrieves the partial dependencies for a particular feature
11281208
"""
11291209
try:
1130-
return next(pd for pd in self.partial_dependencies if pd.get("feature") == feature)
1210+
return next(pd for pd in self.partial_dependencies if pd.get_raw()["feature"] == feature)
11311211
except StopIteration:
11321212
raise ValueError("Partial dependence for feature '%s' cannot be found" % feature)
11331213

dataikuapi/utils.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -94,28 +94,3 @@ def str_to_bool(s):
9494
doublequote=True):
9595
yield [none_if_throws(caster)(val)
9696
for (caster, val) in dku_zip_longest(casters, uncasted_tuple)]
97-
98-
class DSSInternalDict(object):
99-
"""
100-
Class that provides some helpers and an `_internal_dict` dict field that is the actual holder of the data.
101-
"""
102-
103-
def __init__(self, orig_dict=None):
104-
if orig_dict is None:
105-
self._internal_dict = dict()
106-
else:
107-
self._internal_dict = orig_dict
108-
109-
def get(self, name, default=None):
110-
return self._internal_dict.get(name, default)
111-
112-
def get_raw(self):
113-
"""
114-
Gets the raw dictionary of the actual data
115-
116-
:rtype: dict
117-
"""
118-
return self._internal_dict
119-
120-
def __repr__(self):
121-
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"

0 commit comments

Comments
 (0)