1- import pickle
21from pathlib import Path
32
43import pandas as pd
1615
1716
1817products = {
19- "column_labels" : BLD / "data" / "column_labels.pkl " ,
20- "value_labels" : BLD / "data" / "value_labels.pkl " ,
21- "enucs_raw " : BLD / "data" / "ENUSC_raw.pkl " ,
18+ "column_labels" : BLD / "data" / "column_labels.csv " ,
19+ "value_labels" : BLD / "data" / "value_labels.csv " ,
20+ "enusc_raw " : BLD / "data" / "ENUSC_raw.csv " ,
2221}
2322
2423products_filter = {
@@ -37,32 +36,36 @@ def task_create_data_meta(
3736
3837 df , meta = pyreadstat .read_sav (sav_data )
3938 df_pd = pd .DataFrame (df )
40- df_pd .to_pickle (products ["ENUSC_raw " ])
39+ df_pd .to_csv (products ["enusc_raw " ])
4140
42- with Path .open (products ["value_labels" ], "wb" ) as file :
43- pickle .dump (obj = meta .value_labels , file = file )
41+ with Path .open (products ["value_labels" ], "w" ) as file :
42+ # Convert value_labels (dictionary) into a DataFrame for saving as CSV
43+ value_labels_df = pd .DataFrame (meta .value_labels )
44+ value_labels_df .to_csv (file , index = False )
4445
45- with Path .open (products ["column_labels" ], "wb" ) as file :
46+ # Save column_labels as a CSV
47+ with Path .open (products ["column_labels" ], "w" ) as file :
48+ # Convert column_labels to DataFrame directly
4649 column_labels_df = pd .DataFrame (meta .column_labels )
47- pickle . dump ( obj = column_labels_df , file = file )
50+ column_labels_df . to_csv ( file , index = False )
4851
4952
5053def task_create_labels_var_relation (
51- raw_enusc = BLD / "data" / "ENUSC_raw.pkl " ,
52- column_labels = BLD / "column_labels.pkl " ,
54+ raw_enusc = BLD / "data" / "ENUSC_raw.csv " ,
55+ column_labels = BLD / "data" / " column_labels.csv " ,
5356 produces = BLD / "data" / "variable_labels.pkl" ,
5457):
55- raw = pd .read_pickle (raw_enusc )
58+ raw = pd .read_csv (raw_enusc )
5659 column_names = raw .columns .tolist ()
5760 column_names = pd .DataFrame (column_names )
58- column_labels = pd .read_pickle (column_labels )
61+ column_labels = pd .read_csv (column_labels )
5962 labels_var = pd .concat ([column_names , column_labels ], axis = 1 )
6063 labels_var .columns = ["variable_name" , "label" ]
6164 labels_var .to_pickle (produces )
6265
6366
6467def task_filter_variables_and_labels (
65- raw = BLD / "data" / "ENUSC_raw.pkl " ,
68+ raw = BLD / "data" / "ENUSC_raw.csv " ,
6669 labels_raw = BLD / "data" / "variable_labels.pkl" ,
6770 produces = products_filter ,
6871):
0 commit comments