77
88class TimeSeriesDataset (object ):
99 def __init__ (self , data , categorical_cols , target_col , seq_length , prediction_window = 1 ):
10- '''
10+ """
1111 :param data: dataset of type pandas.DataFrame
1212 :param categorical_cols: name of the categorical columns, if None pass empty list
1313 :param target_col: name of the targeted column
1414 :param seq_length: window length to use
1515 :param prediction_window: window length to predict
16- '''
16+ """
1717 self .data = data
1818 self .categorical_cols = categorical_cols
1919 self .numerical_cols = list (set (data .columns ) - set (categorical_cols ) - set (target_col ))
2020 self .target_col = target_col
2121 self .seq_length = seq_length
2222 self .prediction_window = prediction_window
23- self .preprocessor = None
24-
25- def preprocess_data (self ):
26- '''Preprocessing function'''
27- X = self .data .drop (self .target_col , axis = 1 )
28- y = self .data [self .target_col ]
2923
3024 self .preprocessor = ColumnTransformer (
3125 [("scaler" , StandardScaler (), self .numerical_cols ),
3226 ("encoder" , OneHotEncoder (), self .categorical_cols )],
3327 remainder = "passthrough"
3428 )
29+ if self .target_col :
30+ self .y_scaler = StandardScaler ()
31+
32+ def preprocess_data (self ):
33+ """Preprocessing function"""
34+ X = self .data .drop (self .target_col , axis = 1 )
35+ y = self .data [self .target_col ]
3536
3637 X_train , X_test , y_train , y_test = train_test_split (X , y , train_size = 0.8 , shuffle = False )
3738 X_train = self .preprocessor .fit_transform (X_train )
3839 X_test = self .preprocessor .transform (X_test )
3940
4041 if self .target_col :
41- return X_train , X_test , y_train .values , y_test .values
42+ y_train = self .y_scaler .fit_transform (y_train )
43+ y_test = self .y_scaler .transform (y_test )
44+ return X_train , X_test , y_train , y_test
4245 return X_train , X_test
4346
4447 def frame_series (self , X , y = None ):
45- '''
48+ """
4649 Function used to prepare the data for time series prediction
4750 :param X: set of features
4851 :param y: targeted value to predict
4952 :return: TensorDataset
50- '''
53+ """
5154 nb_obs , nb_features = X .shape
5255 features , target , y_hist = [], [], []
5356
@@ -69,11 +72,11 @@ def frame_series(self, X, y=None):
6972 return TensorDataset (features_var )
7073
7174 def get_loaders (self , batch_size : int ):
72- '''
75+ """
7376 Preprocess and frame the dataset
7477 :param batch_size: batch size
7578 :return: DataLoaders associated to training and testing data
76- '''
79+ """
7780 X_train , X_test , y_train , y_test = self .preprocess_data ()
7881 nb_features = X_train .shape [1 ]
7982
@@ -83,3 +86,9 @@ def get_loaders(self, batch_size: int):
8386 train_iter = DataLoader (train_dataset , batch_size = batch_size , shuffle = False , drop_last = True )
8487 test_iter = DataLoader (test_dataset , batch_size = batch_size , shuffle = False , drop_last = True )
8588 return train_iter , test_iter , nb_features
89+
90+ def invert_scale (self , predictions ):
91+ if isinstance (predictions , torch .Tensor ):
92+ predictions = predictions .numpy ()
93+ unscaled = self .y_scaler .inverse_transform (predictions )
94+ return torch .Tensor (unscaled )
0 commit comments