Skip to content

Commit 56859e6

Browse files
authored
Merge PR #60 utils: remove DSSInternalDict
from tasks/dss70-remove-internal-dict
2 parents 166a248 + 9550f74 commit 56859e6

File tree

2 files changed

+108
-60
lines changed

2 files changed

+108
-60
lines changed

dataikuapi/dss/ml.py

Lines changed: 108 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from ..utils import DataikuException
22
from ..utils import DataikuUTF8CSVReader
33
from ..utils import DataikuStreamedHttpUTF8CSVReader
4-
from ..utils import DSSInternalDict
54
import json
65
import time
76
from .metrics import ComputedMetrics
@@ -824,28 +823,39 @@ def get_partial_dependencies(self):
824823
return DSSPartialDependencies(data)
825824

826825

827-
class DSSSubpopulationGlobal(DSSInternalDict):
826+
class DSSSubpopulationGlobal(object):
828827
"""
829828
Object to read details of performance on global population used for subpopulation analyses.
830829
831830
Do not create this object directly, use :meth:`DSSSubpopulationAnalyses.get_global()` instead
832831
"""
833832

834833
def __init__(self, data, prediction_type):
835-
super(DSSSubpopulationGlobal, self).__init__(data)
834+
self._internal_dict = data
836835
self.prediction_type = prediction_type
837836

837+
def get_raw(self):
838+
"""
839+
Gets the raw dictionary of the global subpopulation performance
840+
841+
:rtype: dict
842+
"""
843+
return self._internal_dict
844+
845+
def __repr__(self):
846+
return "{cls}(prediction_type={type})".format(cls=self.__class__.__name__, type=self.prediction_type)
847+
838848
def get_performance_metrics(self):
839849
"""
840850
Gets the performance results of the global population used for the subpopulation analysis
841851
"""
842-
return self.get("performanceMetrics")
852+
return self._internal_dict["performanceMetrics"]
843853

844854
def get_prediction_info(self):
845855
"""
846856
Gets the prediction info of the global population used for the subpopulation analysis
847857
"""
848-
global_metrics = self.get("perf").get("globalMetrics")
858+
global_metrics = self._internal_dict["perf"]["globalMetrics"]
849859
if self.prediction_type == "BINARY_CLASSIFICATION":
850860
return {
851861
"predictedPositiveRatio": global_metrics["predictionAvg"][0],
@@ -862,22 +872,37 @@ def get_prediction_info(self):
862872
}
863873

864874

865-
class DSSSubpopulationModality(DSSInternalDict):
875+
class DSSSubpopulationModality(object):
866876
"""
867877
Object to read details of a subpopulation analysis modality
868878
869879
Do not create this object directly, use :meth:`DSSSubpopulationAnalysis.get_modality_data(definition)` instead
870880
"""
871881

872882
def __init__(self, feature_name, computed_as_type, data, prediction_type):
873-
super(DSSSubpopulationModality, self).__init__(data)
874-
883+
self._internal_dict = data
875884
self.prediction_type = prediction_type
876885
if computed_as_type == "CATEGORY":
877886
self.definition = DSSSubpopulationCategoryModalityDefinition(feature_name, data)
878887
elif computed_as_type == "NUMERIC":
879888
self.definition = DSSSubpopulationNumericModalityDefinition(feature_name, data)
880-
889+
890+
def get_raw(self):
891+
"""
892+
Gets the raw dictionary of the subpopulation analysis modality
893+
894+
:rtype: dict
895+
"""
896+
return self._internal_dict
897+
898+
def __repr__(self):
899+
computed_as_type = "CATEGORY" if isinstance(self.definition, DSSSubpopulationCategoryModalityDefinition) else 'NUMERIC'
900+
return "{cls}(prediction_type={type}, feature={feature}, computed_as={computed_as_type})".format(
901+
cls=self.__class__.__name__,
902+
type=self.prediction_type,
903+
feature=self.definition.feature_name,
904+
computed_as_type=computed_as_type)
905+
881906
def get_definition(self):
882907
"""
883908
Gets the definition of the subpopulation analysis modality
@@ -891,23 +916,23 @@ def is_excluded(self):
891916
"""
892917
Whether modality has been excluded from analysis (e.g. too few rows in the subpopulation)
893918
"""
894-
return self.get("excluded", False)
919+
return self._internal_dict.get("excluded", False)
895920

896921
def get_performance_metrics(self):
897922
"""
898923
Gets the performance results of the modality
899924
"""
900925
if self.is_excluded():
901926
raise ValueError("Excluded modalities do not have performance metrics")
902-
return self.get("performanceMetrics")
927+
return self._internal_dict["performanceMetrics"]
903928

904929
def get_prediction_info(self):
905930
"""
906931
Gets the prediction info of the modality
907932
"""
908933
if self.is_excluded():
909934
raise ValueError("Excluded modalities do not have prediction info")
910-
global_metrics = self.get("perf").get("globalMetrics")
935+
global_metrics = self._internal_dict["perf"]["globalMetrics"]
911936
if self.prediction_type == "BINARY_CLASSIFICATION":
912937
return {
913938
"predictedPositiveRatio": global_metrics["predictionAvg"][0],
@@ -985,26 +1010,41 @@ def __repr__(self):
9851010
return "DSSSubpopulationCategoryModalityDefinition(%s='%s')" % (self.feature_name, self.value)
9861011

9871012

988-
class DSSSubpopulationAnalysis(DSSInternalDict):
1013+
class DSSSubpopulationAnalysis(object):
9891014
"""
9901015
Object to read details of a subpopulation analysis of a trained model
9911016
9921017
Do not create this object directly, use :meth:`DSSSubpopulationAnalyses.get_analysis(feature)` instead
9931018
"""
9941019

9951020
def __init__(self, analysis, prediction_type):
996-
super(DSSSubpopulationAnalysis, self).__init__(analysis)
997-
self.computed_as_type = self.get("computed_as_type")
998-
self.modalities = [DSSSubpopulationModality(analysis.get("feature"), self.computed_as_type, m, prediction_type) for m in self.get("modalities", [])]
1021+
self._internal_dict = analysis
1022+
self.computed_as_type = analysis["computed_as_type"]
1023+
self.modalities = [DSSSubpopulationModality(analysis["feature"], self.computed_as_type, m, prediction_type) for m in analysis.get("modalities", [])]
1024+
1025+
def get_raw(self):
1026+
"""
1027+
Gets the raw dictionary of the subpopulation analysis
1028+
1029+
:rtype: dict
1030+
"""
1031+
return self._internal_dict
1032+
1033+
def __repr__(self):
1034+
return "{cls}(computed_as_type={type}, feature={feature}, modalities_count={modalities_count})".format(
1035+
cls=self.__class__.__name__,
1036+
type=self.computed_as_type,
1037+
feature=self._internal_dict["feature"],
1038+
modalities_count=len(self.modalities))
9991039

10001040
def get_computation_params(self):
10011041
"""
10021042
Gets computation params
10031043
"""
10041044
return {
1005-
"nbRecords": self.get("nbRecords"),
1006-
"randomState": self.get("randomState"),
1007-
"onSample": self.get("onSample")
1045+
"nbRecords": self._internal_dict["nbRecords"],
1046+
"randomState": self._internal_dict["randomState"],
1047+
"onSample": self._internal_dict["onSample"]
10081048
}
10091049

10101050
def list_modalities(self):
@@ -1046,88 +1086,122 @@ def get_modality_data(self, definition):
10461086
raise ValueError("Modality not found: %s" % definition)
10471087

10481088

1049-
class DSSSubpopulationAnalyses(DSSInternalDict):
1089+
class DSSSubpopulationAnalyses(object):
10501090
"""
10511091
Object to read details of subpopulation analyses of a trained model
10521092
10531093
Do not create this object directly, use :meth:`DSSTrainedPredictionModelDetails.get_subpopulation_analyses()` instead
10541094
"""
10551095

10561096
def __init__(self, data, prediction_type):
1057-
super(DSSSubpopulationAnalyses, self).__init__(data)
1097+
self._internal_dict = data
10581098
self.prediction_type = prediction_type
10591099
self.analyses = []
10601100
for analysis in data.get("subpopulationAnalyses", []):
10611101
self.analyses.append(DSSSubpopulationAnalysis(analysis, prediction_type))
1062-
1102+
1103+
def get_raw(self):
1104+
"""
1105+
Gets the raw dictionary of subpopulation analyses
1106+
1107+
:rtype: dict
1108+
"""
1109+
return self._internal_dict
1110+
1111+
def __repr__(self):
1112+
return "{cls}(prediction_type={type}, analyses={analyses})".format(cls=self.__class__.__name__,
1113+
type=self.prediction_type,
1114+
analyses=self.list_analyses())
10631115
def get_global(self):
10641116
"""
10651117
Retrieves information and performance on the full dataset used to compute the subpopulation analyses
10661118
"""
1067-
return DSSSubpopulationGlobal(self.get("global"), self.prediction_type)
1119+
return DSSSubpopulationGlobal(self._internal_dict["global"], self.prediction_type)
10681120

10691121
def list_analyses(self):
10701122
"""
10711123
Lists all features on which subpopulation analyses have been computed
10721124
"""
1073-
return [analysis.get("feature") for analysis in self.analyses]
1125+
return [analysis.get_raw()["feature"] for analysis in self.analyses]
10741126

10751127
def get_analysis(self, feature):
10761128
"""
10771129
Retrieves the subpopulation analysis for a particular feature
10781130
"""
10791131
try:
1080-
return next(analysis for analysis in self.analyses if analysis.get("feature") == feature)
1132+
return next(analysis for analysis in self.analyses if analysis.get_raw()["feature"] == feature)
10811133
except StopIteration:
10821134
raise ValueError("Subpopulation analysis for feature '%s' cannot be found" % feature)
10831135

10841136

1085-
class DSSPartialDependence(DSSInternalDict):
1137+
class DSSPartialDependence(object):
10861138
"""
10871139
Object to read details of partial dependence of a trained model
10881140
10891141
Do not create this object directly, use :meth:`DSSPartialDependencies.get_partial_dependence(feature)` instead
10901142
"""
10911143

10921144
def __init__(self, data):
1093-
super(DSSPartialDependence, self).__init__(data)
1145+
self._internal_dict = data
1146+
1147+
def get_raw(self):
1148+
"""
1149+
Gets the raw dictionary of the partial dependence
1150+
1151+
:rtype: dict
1152+
"""
1153+
return self._internal_dict
1154+
1155+
def __repr__(self):
1156+
return "{cls}(feature={feature})".format(cls=self.__class__.__name__, feature=self._internal_dict["feature"])
10941157

10951158
def get_computation_params(self):
10961159
"""
10971160
Gets computation params
10981161
"""
10991162
return {
1100-
"nbRecords": self.get("nbRecords"),
1101-
"randomState": self.get("randomState"),
1102-
"onSample": self.get("onSample")
1163+
"nbRecords": self._internal_dict["nbRecords"],
1164+
"randomState": self._internal_dict["randomState"],
1165+
"onSample": self._internal_dict["onSample"]
11031166
}
11041167

11051168

1106-
class DSSPartialDependencies(DSSInternalDict):
1169+
class DSSPartialDependencies(object):
11071170
"""
11081171
Object to read details of partial dependencies of a trained model
11091172
11101173
Do not create this object directly, use :meth:`DSSTrainedPredictionModelDetails.get_partial_dependencies()` instead
11111174
"""
11121175

11131176
def __init__(self, data):
1114-
super(DSSPartialDependencies, self).__init__(data)
1177+
self._internal_dict = data
11151178
self.partial_dependencies = []
11161179
for pd in data.get("partialDependencies", []):
11171180
self.partial_dependencies.append(DSSPartialDependence(pd))
11181181

1182+
def get_raw(self):
1183+
"""
1184+
Gets the raw dictionary of partial dependencies
1185+
1186+
:rtype: dict
1187+
"""
1188+
return self._internal_dict
1189+
1190+
def __repr__(self):
1191+
return "{cls}(features={features})".format(cls=self.__class__.__name__, features=self.list_features())
1192+
11191193
def list_features(self):
11201194
"""
11211195
Lists all features on which partial dependencies have been computed
11221196
"""
1123-
return [partial_dep.get("feature") for partial_dep in self.partial_dependencies]
1197+
return [partial_dep.get_raw()["feature"] for partial_dep in self.partial_dependencies]
11241198

11251199
def get_partial_dependence(self, feature):
11261200
"""
11271201
Retrieves the partial dependencies for a particular feature
11281202
"""
11291203
try:
1130-
return next(pd for pd in self.partial_dependencies if pd.get("feature") == feature)
1204+
return next(pd for pd in self.partial_dependencies if pd.get_raw()["feature"] == feature)
11311205
except StopIteration:
11321206
raise ValueError("Partial dependence for feature '%s' cannot be found" % feature)
11331207

@@ -1521,4 +1595,3 @@ def guess(self, prediction_type=None):
15211595
"PUT",
15221596
"/projects/%s/models/lab/%s/%s/guess" % (self.project_key, self.analysis_id, self.mltask_id),
15231597
params = obj)
1524-

dataikuapi/utils.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -94,28 +94,3 @@ def str_to_bool(s):
9494
doublequote=True):
9595
yield [none_if_throws(caster)(val)
9696
for (caster, val) in dku_zip_longest(casters, uncasted_tuple)]
97-
98-
class DSSInternalDict(object):
99-
"""
100-
Class that provides some helpers and an `_internal_dict` dict field that is the actual holder of the data.
101-
"""
102-
103-
def __init__(self, orig_dict=None):
104-
if orig_dict is None:
105-
self._internal_dict = dict()
106-
else:
107-
self._internal_dict = orig_dict
108-
109-
def get(self, name, default=None):
110-
return self._internal_dict.get(name, default)
111-
112-
def get_raw(self):
113-
"""
114-
Gets the raw dictionary of the actual data
115-
116-
:rtype: dict
117-
"""
118-
return self._internal_dict
119-
120-
def __repr__(self):
121-
return self.__class__.__name__ + "(" + self._internal_dict.__repr__() + ")"

0 commit comments

Comments
 (0)