Skip to content

Commit ac103f0

Browse files
Create rename and filter enusc function and start mapping in ntb.
1 parent 9c461f5 commit ac103f0

File tree

10 files changed

+43518
-742
lines changed

10 files changed

+43518
-742
lines changed

.DS_Store

0 Bytes
Binary file not shown.

src/project_mbb/.DS_Store

0 Bytes
Binary file not shown.

src/project_mbb/config.py

Lines changed: 0 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -15,94 +15,3 @@
1515

1616
DOCUMENTS = ROOT.joinpath("documents").resolve()
1717

18-
REL_VAR = [
19-
"0" "enc_rpc",
20-
"rph_ID",
21-
"idhogar",
22-
"enc_idr",
23-
"enc_region",
24-
"rph_nivel",
25-
"rph_ciuo",
26-
"rph_nse",
27-
"rph_parentesco",
28-
"rph_edad",
29-
"rph_sexo",
30-
"rph_idgen",
31-
"Kish",
32-
"Hogar_Kish",
33-
"ANTIG_SECTOR",
34-
"Fact_Pers_Com",
35-
"Fact_Pers_Reg",
36-
"Fact_Pers_Regional_102",
37-
"Fact_Hog_Com",
38-
"Fact_Hog_Reg",
39-
"Fact_Hog_Regional_102",
40-
"VarStrat",
41-
"Conglomerado",
42-
"P_AUMENTO_PAIS",
43-
"P_AUMENTO_COM",
44-
"P_AUMENTO_BARRIO",
45-
"P_FUENTE_INFO_PAIS_1",
46-
"P_FUENTE_INFO_COM_1",
47-
"P_INSEG_LUGARES_1",
48-
"P_INSEG_LUGARES_2",
49-
"P_INSEG_OSCURO_1",
50-
"P_INSEG_DIA_1",
51-
"P_INSEG_OSCURO_2",
52-
"P_INSEG_DIA_2",
53-
"P_DESORDENES_3",
54-
"P_INCIVILIDADES_6",
55-
"P_EXPOS_DELITO",
56-
"P_DELITO_PRONOSTICO__1",
57-
"P_DELITO_PRONOSTICO__2",
58-
"P_DELITO_PRONOSTICO__3",
59-
"P_DELITO_PRONOSTICO__5",
60-
"P_DELITO_PRONOSTICO__6",
61-
"P_DELITO_PRONOSTICO__77",
62-
"P_DELITO_PRONOSTICO__88",
63-
"P_DELITO_PRONOSTICO__99",
64-
"P_MOD_ACTIVIDADES_1",
65-
"P_MOD_ACTIVIDADES_3",
66-
"P_MOD_ACTIVIDADES_8",
67-
"P_MOD_ACTIVIDADES_7",
68-
"P_MOD_ACTIVIDADES_11",
69-
"EV_CONOCE_CCH",
70-
"EV_CONFIA_CCH",
71-
"EV_CONOCE_PDI",
72-
"EV_CONFIA_PDI",
73-
"EV_CONOCE_FMP",
74-
"EV_CONFIA_FMP",
75-
"HUR_QUIEN__0",
76-
"HUR_QUIEN__1",
77-
"HUR_QUIEN__2",
78-
"HUR_QUIEN__3",
79-
"HUR_QUIEN__4",
80-
"HUR_QUIEN__5",
81-
"HUR_QUIEN__6",
82-
"HUR_QUIEN__7",
83-
"HUR_MES",
84-
"HUR_DONDE_ESP",
85-
"HUR_DONDE_COMUNA",
86-
"HUR_DONDE_REGION",
87-
"HUR_DENUNCIA",
88-
"HUR_DENUNCIA_COMO",
89-
"HUR_SATISF_DEN",
90-
"HUR_ACCION_POLICIAL",
91-
"HUR_CONTACTO_MP",
92-
"HUR_MOTIV_NO_DEN",
93-
"SCREEN_ROB_HUR",
94-
"HUR_DENUNCIAS",
95-
"PAD",
96-
"PADC",
97-
"PADB",
98-
"PCOS",
99-
"VH_DV",
100-
"VP_DV",
101-
"DEN_VHDV",
102-
"COSC_DV",
103-
"DEN_ROBOS",
104-
"VH_ROBOS",
105-
"RDV",
106-
"RDV_INTENTO",
107-
"HUR",
108-
]
165 Bytes
Binary file not shown.

src/project_mbb/data_management/filter_clean.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import pandas as pd
22

3-
from project_mbb.config import REL_VAR
3+
4+
from project_mbb.parameters import rename_mapping, region_mapping, relevant_var
5+
46

57
pd.options.mode.copy_on_write = True
68
pd.options.future.infer_string = True
@@ -27,20 +29,33 @@ def _concat_labels_var(raw_enusc, labels_raw):
2729

2830

2931
def _filter_labels_var(var_labels):
30-
var_labels_filt = var_labels[var_labels["variable_name"].isin(REL_VAR)]
32+
var_labels_filt = var_labels[var_labels["variable_name"].isin(relevant_var)]
3133
return var_labels_filt
3234

3335

34-
# def _no_ready_clean_enusc(
35-
# raw=BLD / "data" / "ENUSC_raw.csv",
36-
# labels_raw=BLD / "data" / "variable_labels.pkl",
37-
# produces=products_filter,
38-
# ):
39-
# enusc_filtered = _filter_relevant_variables(raw)
40-
# enusc_filtered.to_pickle(products_filter["enusc_filt"])
4136

4237

43-
# def _no_ready_filter_relevant_variables(raw):
44-
# enusc = pd.read_csv(raw)
45-
# enusc_filtered = enusc[REL_VAR]
46-
# return enusc_filtered
38+
39+
def clean_enusc(raw_enusc):
40+
enusc_filtered = _filter_enusc(raw_enusc)
41+
enusc_category = _map_category_region(enusc_filtered)
42+
#enusc_renamed = _rename_enusc(enusc_filtered)
43+
return enusc_category
44+
45+
def _filter_enusc(raw_enusc):
46+
enusc = pd.read_csv(raw_enusc)
47+
enusc_filtered = enusc[relevant_var]
48+
return enusc_filtered
49+
50+
51+
def _rename_enusc(enusc_lower):
52+
enusc_lower.columns = enusc_lower.columns.str.lower()
53+
enusc_renamed = enusc_lower.rename(columns=rename_mapping)
54+
return enusc_renamed
55+
56+
def _map_category_region(enusc_filtered):
57+
58+
return enusc_filtered
59+
60+
61+

src/project_mbb/data_management/task_data_management.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import pyreadstat
55

66
from project_mbb.config import BLD, DATA
7-
from project_mbb.data_management.filter_clean import create_labels_var_relation
7+
from project_mbb.data_management.filter_clean import create_labels_var_relation, clean_enusc
8+
9+
810

911
pd.options.mode.copy_on_write = True
1012
pd.options.future.infer_string = True
@@ -22,7 +24,7 @@
2224
"labels_filt": BLD / "data" / "variable_labels_filtered.pkl",
2325
}
2426

25-
27+
"""
2628
def task_create_data_meta(
2729
raw_data=DATA / "base-usuario-20-enusc-2023.sav", produces=products
2830
):
@@ -40,6 +42,7 @@ def task_create_data_meta(
4042
# Convert column_labels to DataFrame directly
4143
column_labels_df = pd.DataFrame(meta.column_labels)
4244
column_labels_df.to_csv(file, index=False)
45+
"""
4346

4447

4548
def task_clean_labels(
@@ -50,17 +53,10 @@ def task_clean_labels(
5053
labels_var = create_labels_var_relation(raw, labels_raw)
5154
labels_var.to_pickle(produces)
5255

56+
def task_clean_enusc(raw=BLD / "data" / "ENUSC_raw.csv",
57+
labels_raw=BLD / "data" / "column_labels.csv",
58+
produces=BLD / "data" / "enusc_test.pkl",):
59+
data_test = clean_enusc(raw)
60+
data_test.to_pickle(produces)
5361

54-
# def no_ready_task_clean_data(raw=BLD / "data" / "ENUSC_raw.csv",
55-
# labels_raw=BLD / "data" / "variable_labels.pkl",
56-
# produces=products_filter):
5762

58-
# def no_ready_filter_variables_and_labels(
59-
# raw=BLD / "data" / "ENUSC_raw.csv",
60-
# labels_raw=BLD / "data" / "variable_labels.pkl",
61-
# produces=products_filter,
62-
# ):
63-
# enusc_filtered = filter_relevant_variables(raw)
64-
# enusc_filtered.to_pickle(products_filter["enusc_filt"])
65-
# var_labels = filter_labels_var(labels_raw)
66-
# var_labels.to_pickle(products_filter["labels_filt"])

0 commit comments

Comments
 (0)