1111import warnings
1212import numpy as np
1313
14- from .utils .numerical_transformer import NumericalTransformer
15- from .utils .categorical_transformer import CategoricalTransformer
16- from .utils .datetime_transformer import DatetimeTransformer
14+ # from .utils.numerical_transformer import NumericalTransformer
15+ # from .utils.categorical_transformer import CategoricalTransformer
16+ # from .utils.datetime_transformer import DatetimeTransformer
1717
1818# UNCOMMENT FOR DEBUGGING
19- # from utils.numerical_transformer import NumericalTransformer
20- # from utils.categorical_transformer import CategoricalTransformer
21- # from utils.datetime_transformer import DatetimeTransformer
19+ from utils .numerical_transformer import NumericalTransformer
20+ from utils .categorical_transformer import CategoricalTransformer
21+ from utils .datetime_transformer import DatetimeTransformer
2222
2323class Preprocessor :
2424 ML_TASKS = {"classification" , "regression" , None }
25- NUM_FILL_NULL_STRATEGIES = {"interpolate" ,"forward" , "backward" , "min" , "max" , "mean" , "zero" , "one" }
25+ NUM_FILL_NULL_STRATEGIES = {"none" , " interpolate" ,"forward" , "backward" , "min" , "max" , "mean" , "zero" , "one" }
2626 SCALING_STRATEGIES = {"none" , "normalize" , "standardize" , "quantile" }
2727 """
2828 A class for preprocessing datasets based on polars, including feature selection, handling missing values, scaling,
@@ -120,7 +120,7 @@ def __init__(
120120 missing_values_threshold : float = 0.999 ,
121121 n_bins : int = 0 ,
122122 scaling : Literal ["none" , "normalize" , "standardize" , "quantile" ] = "none" ,
123- num_fill_null : Literal ["interpolate" ,"forward" , "backward" , "min" , "max" , "mean" , "zero" , "one" ] = "mean " ,
123+ num_fill_null : Literal ["none" , " interpolate" ,"forward" , "backward" , "min" , "max" , "mean" , "zero" , "one" ] = "none " ,
124124 unseen_labels = 'ignore' ,
125125 ml_task : Literal ["classification" , "regression" , None ] = None ,
126126 target_column : str = None ,
@@ -662,6 +662,7 @@ def get_categorical_features(self) -> Tuple[str]:
662662 # real_data = pd.read_csv(os.path.join(file_path,"dataset.csv"))
663663 # # real_data["income"] = real_data["income"].map({"<=50K": 0, ">50K": 1})
664664
665- preprocessor = Preprocessor (real_data , num_fill_null = np . nan , scaling = 'standardize ' )
665+ preprocessor = Preprocessor (real_data , num_fill_null = "none" , scaling = 'quantile ' )
666666 real_data_preprocessed = preprocessor .transform (real_data )
667- df_inverse = preprocessor .inverse_transform (real_data_preprocessed )
667+ df_inverse = preprocessor .inverse_transform (real_data_preprocessed )
668+ pass
0 commit comments