@@ -19,18 +19,19 @@ def clean_enusc(raw_enusc):
1919 enusc_filtered = _filter_enusc (raw_enusc , relevant_var )
2020 enusc_renamed = _rename_enusc (enusc_filtered , rename_mapping )
2121 enusc_mapped = _map_categories (enusc_renamed )
22- # here check for no objects present
2322 enusc_filled = _fill_missing (enusc_mapped )
2423 enusc_dtypes = _set_data_types_not_mapped_var (enusc_filled )
2524 return enusc_dtypes
2625
2726
2827def _filter_enusc (raw_enusc , relevant_var ):
28+ _fail_if_not_list (relevant_var )
2929 enusc_filtered = raw_enusc [relevant_var ]
3030 return enusc_filtered
3131
3232
3333def _rename_enusc (enusc_filtered , rename_mapping ):
34+ _fail_if_not_equal_length (enusc_filtered , rename_mapping )
3435 enusc_renamed = enusc_filtered .copy ()
3536 enusc_renamed .columns = enusc_renamed .columns .str .lower ()
3637 enusc_renamed = enusc_renamed .rename (columns = rename_mapping )
@@ -52,7 +53,12 @@ def _map_categories(enusc_renamed):
5253
5354
5455def _fill_missing (enusc_mapped ):
56+ _fail_if_not_dataframe (enusc_mapped )
57+ _fail_if_missing_columns (enusc_mapped , categories , "categories" )
58+ _fail_if_missing_columns (enusc_mapped , map_category , "map_category" )
59+
5560 replacements = {99 : pd .NA , 77 : "Other" , 88 : "Don't know" , 85 : "Doesn't apply" }
61+
5662 enusc_filling = enusc_mapped .copy ()
5763 for column in categories :
5864 enusc_filling [column ] = enusc_filling [column ].astype (pd .Int8Dtype ())
@@ -61,11 +67,7 @@ def _fill_missing(enusc_mapped):
6167 )
6268
6369 if enusc_mapped [column ].dtype != "object" :
64- enusc_filling [column ] = (
65- enusc_filling [column ]
66- # .astype(pd.Int8Dtype()) esto se puede borrar
67- .astype (pd .CategoricalDtype ())
68- )
70+ enusc_filling [column ] = enusc_filling [column ].astype (pd .CategoricalDtype ())
6971 enusc_filling [column ] = enusc_filling [column ].cat .rename_categories (
7072 lambda x : replacements .get (x , x )
7173 )
@@ -86,6 +88,11 @@ def _fill_missing(enusc_mapped):
8688
8789
8890def _set_data_types_not_mapped_var (enusc_filled ):
91+ _fail_if_columns_not_found (enusc_filled , floats )
92+ _fail_if_columns_not_found (enusc_filled , integers )
93+ _fail_if_columns_not_found (enusc_filled , categories )
94+ _fail_if_columns_not_found (enusc_filled , strings )
95+
8996 enusc_dtypes = enusc_filled .copy ()
9097 for value in floats :
9198 enusc_dtypes [value ] = enusc_dtypes [value ].astype (pd .Float64Dtype ())
@@ -96,3 +103,61 @@ def _set_data_types_not_mapped_var(enusc_filled):
96103 for ent in strings :
97104 enusc_dtypes [ent ] = enusc_dtypes [ent ].astype (str )
98105 return enusc_dtypes
106+
107+
108+ # Error Handling
109+
110+
111+ def _fail_if_not_list (relevant_var ):
112+ """Raise TypeError if relevant_var for filtering is not a list."""
113+ if not isinstance (relevant_var , list ):
114+ error_msg = f"Expected a list, but got { type (relevant_var ).__name__ } "
115+ raise TypeError (error_msg )
116+
117+
118+ class ShapeError (Exception ):
119+ """Custom exception for errors in _rename_enusc."""
120+
121+
122+ def _fail_if_not_equal_length (enusc_filtered , rename_mapping ):
123+ """Raise ShapeError if data and renaming dictionary have unequal width.
124+
125+ The number of columns in `enusc_filtered` must match the length of
126+ `rename_mapping`.
127+ """
128+ if enusc_filtered .shape [1 ] != len (rename_mapping ):
129+ error_msg = (
130+ f"Lists have unequal widths: "
131+ f"{ enusc_filtered .shape [1 ]} vs { len (rename_mapping )} "
132+ )
133+ raise ShapeError (error_msg )
134+
135+
136+ class MissingError (Exception ):
137+ """Custom exception for missing."""
138+
139+
140+ def _fail_if_not_dataframe (enusc_mapped ):
141+ """Raise an error if enusc_mapped is not a Pandas DataFrame."""
142+ if not isinstance (enusc_mapped , pd .DataFrame ):
143+ error_msg = f"Expected a DataFrame, but got { type (enusc_mapped ).__name__ } "
144+ raise TypeError (error_msg )
145+
146+
147+ def _fail_if_missing_columns (enusc_mapped , column_list , list_name ):
148+ """Raise an error if any column in column_list is missing from enusc_mapped."""
149+ missing_columns = [col for col in column_list if col not in enusc_mapped .columns ]
150+ if missing_columns :
151+ error_msg = (
152+ f"The following columns from '{ list_name } '"
153+ f"are missing in the DataFrame: { missing_columns } "
154+ )
155+ raise MissingError (error_msg )
156+
157+
158+ def _fail_if_columns_not_found (enusc_filled , column_list ):
159+ """Raise an error if a column in the list is not found in the DataFrame."""
160+ missing_columns = [col for col in column_list if col not in enusc_filled .columns ]
161+ if missing_columns :
162+ error_msg = f"Columns not found in DataFrame: { ', ' .join (missing_columns )} "
163+ raise MissingError (error_msg )
0 commit comments