1
1
import json
2
- from typing import Any , Dict , List , Optional , Tuple , Type , Union
2
+ from typing import Any , Optional , Union
3
3
4
4
import numpy as np
5
5
import pandas as pd
@@ -102,13 +102,13 @@ def _apply_encoding(
102
102
self ,
103
103
X : pd .DataFrame ,
104
104
y : Optional [pd .Series ],
105
- categorical_features : List [str ],
106
- numeric_features : List [str ],
105
+ categorical_features : list [str ],
106
+ numeric_features : list [str ],
107
107
saved_encoder : Optional [ColumnTransformer ] = None ,
108
108
* ,
109
109
fit_encoders : bool = False ,
110
110
log : Optional [bool ] = False ,
111
- ) -> Tuple [pd .DataFrame , Optional [pd .Series ], Optional [ColumnTransformer ]]:
111
+ ) -> tuple [pd .DataFrame , Optional [pd .Series ], Optional [ColumnTransformer ]]:
112
112
"""Apply the encoding to the data.
113
113
114
114
Parameters
@@ -117,9 +117,9 @@ def _apply_encoding(
117
117
The DataFrame with features to encode
118
118
y : pd.Series, optional
119
119
The target series. If provided, the target column will be encoded, by default None
120
- categorical_features : List [str]
120
+ categorical_features : list [str]
121
121
Categorical features
122
- numeric_features : List [str]
122
+ numeric_features : list [str]
123
123
Numeric features
124
124
fit_encoders : bool, optional
125
125
Whether to fit the encoders, by default False
@@ -130,7 +130,7 @@ def _apply_encoding(
130
130
131
131
Returns
132
132
-------
133
- Tuple [pd.DataFrame, Optional[pd.Series], Optional[ColumnTransformer]]
133
+ tuple [pd.DataFrame, Optional[pd.Series], Optional[ColumnTransformer]]
134
134
The encoded data, the target column, and the encoder
135
135
"""
136
136
if not fit_encoders and not saved_encoder :
@@ -182,7 +182,7 @@ def _apply_encoding(
182
182
183
183
return encoded_data , targets , encoder
184
184
185
- def _get_feature_types (self , X : pd .DataFrame ) -> Tuple [ List [str ], List [str ]]:
185
+ def _get_feature_types (self , X : pd .DataFrame ) -> tuple [ list [str ], list [str ]]:
186
186
"""Get categorical and numeric feature lists.
187
187
188
188
Parameters
@@ -192,7 +192,7 @@ def _get_feature_types(self, X: pd.DataFrame) -> Tuple[List[str], List[str]]:
192
192
193
193
Returns
194
194
-------
195
- Tuple[List [str], List [str]]
195
+ tuple[list [str], list [str]]
196
196
Categorical and numeric features
197
197
"""
198
198
categorical_features = [col for col in X .columns if X [col ].dtype in ["object" , "category" ]]
@@ -201,8 +201,8 @@ def _get_feature_types(self, X: pd.DataFrame) -> Tuple[List[str], List[str]]:
201
201
return categorical_features , numeric_features
202
202
203
203
def _split_categorical_features (
204
- self , df : pd .DataFrame , categorical_features : List [str ]
205
- ) -> Tuple [ List [str ], List [str ]]:
204
+ self , df : pd .DataFrame , categorical_features : list [str ]
205
+ ) -> tuple [ list [str ], list [str ]]:
206
206
"""Split categorical features into low and high cardinality features."""
207
207
low_cardinality_features = [
208
208
col for col in categorical_features if df [col ].nunique () <= self .cardinality_threshold
@@ -214,7 +214,7 @@ def _split_categorical_features(
214
214
215
215
def _get_encoder_class_and_params (
216
216
self , encoder_name : str
217
- ) -> Tuple [Union [Type [OrdinalEncoder ], Type [TargetEncoder ]], dict [str , Any ]]:
217
+ ) -> tuple [Union [type [OrdinalEncoder ], type [TargetEncoder ]], dict [str , Any ]]:
218
218
"""Map encoder name to the corresponding encoder class."""
219
219
encoder = self .ENCODER_MAP .get (encoder_name )
220
220
encoder_params = self .ENCODER_MAP_PARAMS .get (encoder_name )
@@ -230,9 +230,9 @@ def _get_encoder_class_and_params(
230
230
def _log_encoder_override (
231
231
self ,
232
232
feature : str ,
233
- encoder_class : Type [Union [OrdinalEncoder , TargetEncoder ]],
234
- high_cardinality_features : List [str ],
235
- low_cardinality_features : List [str ],
233
+ encoder_class : type [Union [OrdinalEncoder , TargetEncoder ]],
234
+ high_cardinality_features : list [str ],
235
+ low_cardinality_features : list [str ],
236
236
) -> None :
237
237
if feature in high_cardinality_features :
238
238
self .logger .info (
@@ -257,9 +257,9 @@ def _log_encoder_override(
257
257
258
258
def _create_column_transformer (
259
259
self ,
260
- high_cardinality_features : List [str ],
261
- low_cardinality_features : List [str ],
262
- numeric_features : List [str ],
260
+ high_cardinality_features : list [str ],
261
+ low_cardinality_features : list [str ],
262
+ numeric_features : list [str ],
263
263
) -> ColumnTransformer :
264
264
"""Create a ColumnTransformer for encoding."""
265
265
transformers = []
@@ -345,7 +345,7 @@ def _restore_column_order(self, df: pd.DataFrame, encoded_data: pd.DataFrame) ->
345
345
return encoded_data [new_column_order ]
346
346
347
347
def _convert_ordinal_encoded_columns_to_int (
348
- self , encoded_data : pd .DataFrame , encoded_feature_map : Dict [str , str ]
348
+ self , encoded_data : pd .DataFrame , encoded_feature_map : dict [str , str ]
349
349
) -> pd .DataFrame :
350
350
"""Convert ordinal encoded columns to the smallest possible integer dtype."""
351
351
ordinal_encoded_features = [
@@ -361,7 +361,7 @@ def _convert_ordinal_encoded_columns_to_int(
361
361
# since instead of raising the error and being converted to "integer" in the
362
362
# except, it will remain as a float64 silenty.
363
363
if (encoded_data [col ] <= 0 ).any ():
364
- raise ValueError ("Column contains negative values." )
364
+ raise ValueError ("Column contains negative values." ) # noqa: TRY301, EM101
365
365
encoded_data [col ] = pd .to_numeric (encoded_data [col ].values , downcast = "unsigned" )
366
366
except ValueError :
367
367
try :
@@ -401,7 +401,7 @@ def _convert_float64_to_float32(self, encoded_data: pd.DataFrame) -> pd.DataFram
401
401
encoded_data [col ] = encoded_data [col ].astype (np .float32 )
402
402
return encoded_data
403
403
404
- def _create_feature_encoder_map (self , column_transformer : ColumnTransformer ) -> Dict [str , str ]:
404
+ def _create_feature_encoder_map (self , column_transformer : ColumnTransformer ) -> dict [str , str ]:
405
405
"""Create a dictionary to store the encoder used for each feature."""
406
406
feature_encoder_map = {}
407
407
transformed_features = column_transformer .get_feature_names_out ()
@@ -419,11 +419,11 @@ def _create_feature_encoder_map(self, column_transformer: ColumnTransformer) ->
419
419
420
420
def _log_feature_info (
421
421
self ,
422
- categorical_features : List [str ],
423
- numeric_features : List [str ],
424
- low_cardinality_features : List [str ],
425
- high_cardinality_features : List [str ],
426
- feature_encoder_map : Dict [str , str ],
422
+ categorical_features : list [str ],
423
+ numeric_features : list [str ],
424
+ low_cardinality_features : list [str ],
425
+ high_cardinality_features : list [str ],
426
+ feature_encoder_map : dict [str , str ],
427
427
) -> None :
428
428
"""Log information about the features."""
429
429
self .logger .info (
0 commit comments