22import statsmodels .api as sm
33import statsmodels .formula .api as smf
44
5+ from project_mbb .parameters import info_sources_mapping , perception_change_mapping
6+
57
68def regression_perception_info (enusc_clean ):
79 enusc_model_pre = _set_category_values (enusc_clean )
@@ -12,17 +14,23 @@ def regression_perception_info(enusc_clean):
1214
1315
1416def _set_category_values (enusc_clean ):
15- enusc_model = pd .DataFrame ()
16- enusc_model ["crime_increase_perception_commune" ] = enusc_clean [
17+ _fail_if_invalid_categories_perception (enusc_clean , perception_change_mapping )
18+ _fail_if_invalid_categories_source (enusc_clean , info_sources_mapping )
19+
20+ enusc_model_pre = pd .DataFrame ()
21+ enusc_model_pre ["crime_increase_perception_commune" ] = enusc_clean [
1722 "crime_increase_perception_commune"
1823 ].cat .codes
19- enusc_model ["crime_info_source_commune" ] = enusc_clean [
24+ enusc_model_pre ["crime_info_source_commune" ] = enusc_clean [
2025 "crime_info_source_commune"
2126 ].cat .codes
22- return enusc_model
27+ return enusc_model_pre
2328
2429
2530def _set_binary_for_info_source (enusc_model ):
31+ _fail_if_invalid_category_values (enusc_model , "crime_increase_perception_commune" )
32+ _fail_if_invalid_category_values (enusc_model , "crime_info_source_commune" )
33+
2634 tech_based_codes = {2 , 5 , 6 }
2735
2836 enusc_model ["tech_based" ] = enusc_model ["crime_info_source_commune" ].apply (
@@ -32,6 +40,8 @@ def _set_binary_for_info_source(enusc_model):
3240
3341
3442def _drop_missing (enusc_model ):
43+ _fail_if_invalid_tech_based_values (enusc_model )
44+
3545 enusc_model_clean = enusc_model [
3646 (enusc_model ["crime_increase_perception_commune" ] != - 1 )
3747 & (enusc_model ["crime_info_source_commune" ] != - 1 )
@@ -40,9 +50,136 @@ def _drop_missing(enusc_model):
4050
4151
4252def _run_logistic_regression (enusc_model_clean ):
53+ _fail_if_missing_values_after_drop (enusc_model_clean )
54+
4355 x = enusc_model_clean [["tech_based" ]]
4456 x = sm .add_constant (x )
4557 model = smf .mnlogit (
4658 "crime_increase_perception_commune ~ tech_based" , data = enusc_model_clean
4759 ).fit ()
4860 return model
61+
62+
63+ # Error Handling
64+
65+
66+ def _fail_if_invalid_categories_perception (enusc_clean , perception_change_mapping ):
67+ """Raises ValueError if the categories in 'crime_increase_perception_commune'
68+
69+ are missing.
70+ """
71+ ignored_cat = 99
72+ perception_categories = set (
73+ enusc_clean ["crime_increase_perception_commune" ].cat .categories
74+ )
75+ expected_categories = set (perception_change_mapping .values ())
76+
77+ perception_categories = {
78+ cat
79+ for cat in perception_categories
80+ if cat != ignored_cat and pd .isna (cat ) is False
81+ }
82+ expected_categories = {
83+ cat
84+ for cat in expected_categories
85+ if cat != ignored_cat and pd .isna (cat ) is False
86+ }
87+
88+ missing_categories = expected_categories - perception_categories
89+ extra_categories = perception_categories - expected_categories
90+
91+ if missing_categories or extra_categories :
92+ error_msg = (
93+ f"Invalid categories for 'crime_increase_perception_commune':\n "
94+ f"Missing categories: { missing_categories } \n "
95+ f"Extra categories: { extra_categories } "
96+ )
97+ raise ValueError (error_msg )
98+
99+
100+ def _fail_if_invalid_categories_source (enusc_clean , info_source_mapping ):
101+ """Raises ValueError if the categories in 'crime_info_source_commune'
102+
103+ are missing.
104+ """
105+ ignored_cat = 99
106+
107+ source_categories = set (enusc_clean ["crime_info_source_commune" ].cat .categories )
108+ expected_categories = set (info_sources_mapping .values ())
109+
110+ source_categories = {
111+ cat for cat in source_categories if cat != ignored_cat and pd .isna (cat ) is False
112+ }
113+ expected_categories = {
114+ cat
115+ for cat in expected_categories
116+ if cat != ignored_cat and pd .isna (cat ) is False
117+ }
118+
119+ missing_categories = expected_categories - source_categories
120+ extra_categories = source_categories - expected_categories
121+
122+ if missing_categories or extra_categories :
123+ error_msg = (
124+ f"Invalid categories for 'crime_info_source_commune':\n "
125+ f"Missing categories: { missing_categories } \n "
126+ f"Extra categories: { extra_categories } "
127+ )
128+ raise ValueError (error_msg )
129+
130+
131+ def _fail_if_invalid_category_values (enusc_model , column_name ):
132+ """Raises an error if any value in the specified column is
133+
134+ outside the valid range.
135+ """
136+ if column_name == "crime_increase_perception_commune" :
137+ valid_range = set (range (5 )) | {- 1 } # Expecting 0 to 4
138+ elif column_name == "crime_info_source_commune" :
139+ valid_range = set (range (11 )) | {- 1 } # Expecting 0 to 9
140+
141+ column_values = enusc_model [column_name ]
142+ invalid_values = column_values [~ column_values .isin (valid_range )].unique ()
143+
144+ if len (invalid_values ) > 0 :
145+ error_msg = (
146+ f"Invalid category values in '{ column_name } ': { invalid_values } ."
147+ f"Expected values within { valid_range } ."
148+ )
149+ raise ValueError (error_msg )
150+
151+
152+ def _fail_if_invalid_tech_based_values (enusc_model ):
153+ """Raises an error if 'tech_based' column contains values other than -1, 1 or 0."""
154+ valid_values = {0 , 1 , - 1 }
155+ invalid_values = enusc_model [~ enusc_model ["tech_based" ].isin (valid_values )][
156+ "tech_based"
157+ ].unique ()
158+
159+ if len (invalid_values ) > 0 :
160+ error_msg = (
161+ f"Invalid values in 'tech_based' column: { invalid_values } . "
162+ " Expected values are 0 and 1."
163+ )
164+ raise ValueError (error_msg )
165+
166+
167+ def _fail_if_missing_values_after_drop (enusc_model_clean ):
168+ """Raises an error if there are still missing values (i.e., -1 or NaN)
169+
170+ in the specified columns.
171+ """
172+ missing_values = enusc_model_clean [
173+ (enusc_model_clean ["crime_increase_perception_commune" ] == - 1 )
174+ | (enusc_model_clean ["crime_info_source_commune" ] == - 1 )
175+ | enusc_model_clean ["crime_increase_perception_commune" ].isna ()
176+ | enusc_model_clean ["crime_info_source_commune" ].isna ()
177+ ]
178+
179+ if not missing_values .empty :
180+ error_msg = (
181+ f"Missing values after dropping: { missing_values .shape [0 ]} "
182+ "rows contain missing values in 'crime_increase_perception_commune'"
183+ " or 'crime_info_source_commune'."
184+ )
185+ raise ValueError (error_msg )
0 commit comments