@@ -140,6 +140,40 @@ def get_split_params(self):
140140 """
141141 return PredictionSplitParamsHandler (self .mltask_settings )
142142
143+ def split_ordered_by (self , feature_name , ascending = True ):
144+ """
145+ Uses a variable to sort the data for train/test split and hyperparameter optimization
146+ :param str feature_name: Name of the variable to use
147+ :param bool ascending: True iff the test set is expected to have larger time values than the train set
148+ """
149+ self .remove_time_variable ()
150+ if not feature_name in self .mltask_settings ["preprocessing" ]["per_feature" ]:
151+ raise ValueError ("Feature %s doesn't exist in this ML task, can't use as time" % feature_name )
152+ self .mltask_settings ['time' ]['enabled' ] = True
153+ self .mltask_settings ['time' ]['timeVariable' ] = feature_name
154+ self .mltask_settings ['time' ]['ascending' ] = ascending
155+ self .mltask_settings ['preprocessing' ]['per_feature' ][feature_name ]['missing_handling' ] = "DROP_ROW"
156+ if self .mltask_settings ['splitParams' ]['ttPolicy' ] == "SPLIT_SINGLE_DATASET" :
157+ self .mltask_settings ['splitParams' ]['ssdSplitMode' ] = "SORTED"
158+ self .mltask_settings ['splitParams' ]['ssdColumn' ] = feature_name
159+ if self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] == "KFOLD" :
160+ self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] = "TIME_SERIES_KFOLD"
161+ elif self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] == "SHUFFLE" :
162+ self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] = "TIME_SERIES_SINGLE_SPLIT"
163+
164+ def remove_ordered_split (self ):
165+ """
166+ Remove time-based ordering.
167+ """
168+ self .mltask_settings ['time' ]['enabled' ] = False
169+ self .mltask_settings ['time' ]['timeVariable' ] = None
170+ if self .mltask_settings ['splitParams' ]['ttPolicy' ] == "SPLIT_SINGLE_DATASET" :
171+ self .mltask_settings ['splitParams' ]['ssdSplitMode' ] = "RANDOM"
172+ self .mltask_settings ['splitParams' ]['ssdColumn' ] = None
173+ if self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] == "TIME_SERIES_KFOLD" :
174+ self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] = "KFOLD"
175+ elif self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] == "TIME_SERIES_SINGLE_SPLIT" :
176+ self .mltask_settings ['modeling' ]['gridSearchParams' ]['mode' ] = "SHUFFLE"
143177
144178 def get_feature_preprocessing (self , feature_name ):
145179 """
0 commit comments