Skip to content

Commit 1c6bbc4

Browse files
Do tests for labels mgt.
1 parent 035f247 commit 1c6bbc4

File tree

1 file changed

+110
-14
lines changed

1 file changed

+110
-14
lines changed
Lines changed: 110 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,122 @@
1-
import numpy as np
1+
import pandas as pd
22
import pytest
33

4+
from project_mbb.config import BLD
45
from project_mbb.data_management.clean_labels import (
5-
create_labels_var_relation,
66
_concat_labels_var,
77
_filter_labels_var,
88
_rename_variables_eng,
99
)
1010

11-
def assert_categorical_equal(left, right):
12-
assert_series_equal(pd.Series(left), pd.Series(right))
1311

14-
def test_concat_labels_var(data):
15-
data_test = _concat_labels_var(data)
12+
@pytest.fixture
13+
def enusc_raw():
14+
enusc_raw_path = BLD / "data" / "ENUSC_raw.csv"
15+
enusc_raw = pd.read_csv(enusc_raw_path)
16+
return enusc_raw
1617

17-
column = ["variable_name", "label"]
18-
variable_name ={"rph_ID, "Kish", "Conglomerado", }
1918

20-
data_test["rph_ID"] = "Identificador de persona"
21-
data_test["Kish"] = "Informante Kish"
22-
data_test["Conglomerado"] = "Pseudoconglomerado"
23-
data_test["HUR"] = "Hurto consumado"
24-
19+
@pytest.fixture
20+
def labels_raw():
21+
labels_raw_path = BLD / "data" / "column_labels.csv"
22+
labels_raw = pd.read_csv(labels_raw_path)
23+
return labels_raw
2524

26-
25+
26+
@pytest.fixture
27+
def data_test():
28+
data = {
29+
"variable_name": ["rph_id", "idhogar", "enc_idr", "enc_region", "enc_rpc"],
30+
"label": [
31+
"Identificador de persona",
32+
"Identificador de hogar",
33+
"Folio de la vivienda",
34+
"Región",
35+
"Región, provincia, comuna",
36+
],
37+
}
38+
return pd.DataFrame(data)
39+
40+
41+
def test_concat_labels_var(enusc_raw, labels_raw):
42+
data_test = _concat_labels_var(enusc_raw, labels_raw)
43+
44+
assert (
45+
"variable_name" in data_test.columns
46+
), "Column 'variable_name' not found in DataFrame"
47+
assert "label" in data_test.columns, "Column 'label' not found in DataFrame"
48+
49+
# indexes are original ones: s.t if we want to include other variables into
50+
# the filtering then we can continue using this test
51+
assert all(
52+
[
53+
data_test["variable_name"].iloc[0] == "rph_ID"
54+
and data_test["label"].iloc[0] == "Identificador de persona",
55+
data_test["variable_name"].iloc[1247] == "HUR"
56+
and data_test["label"].iloc[1247] == "Hurto consumado",
57+
data_test["variable_name"].iloc[1300] == "Conglomerado"
58+
and data_test["label"].iloc[1300] == "Pseudoconglomerado",
59+
]
60+
)
61+
62+
63+
def test_filter_labels_var(data_test):
64+
expected = {
65+
"variable_name": ["rph_id", "enc_region"],
66+
"label": [
67+
"Identificador de persona",
68+
"Región",
69+
],
70+
}
71+
72+
var_filt = ["rph_id", "enc_region"]
73+
74+
var_labels_fake = data_test
75+
expected_df = pd.DataFrame(expected)
76+
77+
data_filt = _filter_labels_var(var_labels_fake, var_filt)
78+
data_filt = data_filt.reset_index(drop=True)
79+
expected_df = expected_df.reset_index(drop=True)
80+
81+
pd.testing.assert_frame_equal(data_filt, expected_df, check_like=False)
82+
83+
84+
def test_rename_variable_eng(data_test):
85+
rename_mapping = {
86+
"rph_id": "person_id",
87+
"idhogar": "household_id",
88+
"enc_idr": "housing_folio",
89+
"enc_region": "region",
90+
"enc_rpc": "commune",
91+
}
92+
93+
renamed_data_test = _rename_variables_eng(data_test, rename_mapping)
94+
95+
expected = {
96+
"variable_name": [
97+
"person_id",
98+
"household_id",
99+
"housing_folio",
100+
"region",
101+
"commune",
102+
],
103+
"label": [
104+
"Identificador de persona",
105+
"Identificador de hogar",
106+
"Folio de la vivienda",
107+
"Región",
108+
"Región, provincia, comuna",
109+
],
110+
}
111+
112+
expected_df = pd.DataFrame(expected)
113+
114+
pd.testing.assert_frame_equal(renamed_data_test, expected_df, check_like=True)
115+
116+
117+
def test_task_clean_labels_shape():
118+
labels_clean_path = BLD / "data" / "variable_labels_clean.pkl"
119+
labels_clean = pd.read_pickle(labels_clean_path)
120+
shape = labels_clean.shape
121+
expected_shape = (66, 2)
122+
assert expected_shape == shape

0 commit comments

Comments
 (0)