17
17
# along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
# -----------------------------------------------------------------------------.
19
19
"""DISDRODB reader for ENPC PARSIVEL2 raw text data."""
20
+ import zipfile
21
+
20
22
import numpy as np
21
23
import pandas as pd
22
- import zipfile
23
- from disdrodb .utils .logger import log_error
24
+
24
25
from disdrodb .l0 .l0_reader import is_documented_by , reader_generic_docstring
25
26
from disdrodb .l0 .l0a_processing import read_raw_text_file
27
+ from disdrodb .utils .logger import log_error
26
28
27
29
28
30
@is_documented_by (reader_generic_docstring )
@@ -31,13 +33,14 @@ def reader(
31
33
logger = None ,
32
34
):
33
35
"""Reader."""
36
+
34
37
##------------------------------------------------------------------------.
35
38
#### Define function to read each txt file inside each daily zip file
36
- def read_txt_file (file , filename ):
39
+ def read_txt_file (file , filename ):
37
40
##------------------------------------------------------------------------.
38
41
#### Define column names
39
42
column_names = ["TO_PARSE" ]
40
-
43
+
41
44
##------------------------------------------------------------------------.
42
45
#### Define reader options
43
46
reader_kwargs = {}
@@ -62,43 +65,43 @@ def read_txt_file(file, filename):
62
65
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
63
66
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
64
67
reader_kwargs ["na_values" ] = ["na" , "" , "error" ]
65
-
66
- ##------------------------------------------------------------------------.
68
+
69
+ ##------------------------------------------------------------------------.
67
70
#### Read the data
68
71
df = read_raw_text_file (
69
72
filepath = f ,
70
73
column_names = column_names ,
71
74
reader_kwargs = reader_kwargs ,
72
75
logger = logger ,
73
76
)
74
-
77
+
75
78
##------------------------------------------------------------------------.
76
79
#### Adapt the dataframe to adhere to DISDRODB L0 standards
77
80
# Create ID and Value columns
78
81
df = df ["TO_PARSE" ].str .split (":" , expand = True , n = 1 )
79
- df .columns = ["ID" , "Value" ]
80
-
82
+ df .columns = ["ID" , "Value" ]
83
+
81
84
# Select only rows with values
82
85
df = df [df ["Value" ].apply (lambda x : x is not None )]
83
-
86
+
84
87
# Drop rows with invalid IDs
85
88
valid_id_str = np .char .rjust (np .arange (0 , 94 ).astype (str ), width = 2 , fillchar = "0" )
86
89
df = df [df ["ID" ].astype (str ).isin (valid_id_str )]
87
-
90
+
88
91
# Create the dataframe with each row corresponding to a timestep
89
92
# - Group rows based on when ID values restart
90
93
groups = df .groupby ((df ["ID" ].astype (int ).diff () <= 0 ).cumsum ())
91
-
94
+
92
95
# Reshape the dataframe
93
96
group_dfs = []
94
97
for _ , group in groups :
95
98
group_df = group .set_index ("ID" ).T
96
99
group_dfs .append (group_df )
97
-
100
+
98
101
# Merge each timestep dataframe
99
102
# --> Missing columns are infilled by NaN
100
103
df = pd .concat (group_dfs , axis = 0 )
101
-
104
+
102
105
# Assign column names
103
106
column_dict = {
104
107
"01" : "rainfall_rate_32bit" ,
@@ -132,39 +135,39 @@ def read_txt_file(file, filename):
132
135
"30" : "rainfall_rate_16_bit_30" ,
133
136
"31" : "rainfall_rate_16_bit_1200" ,
134
137
"32" : "rainfall_accumulated_16bit" ,
135
- "34" : "rain_kinetic_energy" ,
138
+ "34" : "rain_kinetic_energy" ,
136
139
"35" : "snowfall_rate" ,
137
140
"90" : "raw_drop_concentration" ,
138
141
"91" : "raw_drop_average_velocity" ,
139
142
"93" : "raw_drop_number" ,
140
143
}
141
-
144
+
142
145
df = df .rename (column_dict , axis = 1 )
143
-
146
+
144
147
# Keep only columns defined in the dictionary
145
148
df = df [list (column_dict .values ())]
146
-
147
- # Define datetime "time" column from filename
148
- datetime_str = ' ' .join (filename .replace (' .txt' , '' ).split ('_' )[- 6 :])
149
- df ["time" ] = pd .to_datetime (datetime_str , format = ' %Y %m %d %H %M %S' )
150
-
149
+
150
+ # Define datetime "time" column from filename
151
+ datetime_str = " " .join (filename .replace (" .txt" , "" ).split ("_" )[- 6 :])
152
+ df ["time" ] = pd .to_datetime (datetime_str , format = " %Y %m %d %H %M %S" )
153
+
151
154
# # Drop columns not agreeing with DISDRODB L0 standards
152
155
# columns_to_drop = [
153
156
# "sensor_date",
154
157
# "sensor_time",
155
- # "firmware_iop",
156
- # "firmware_dsp",
157
- # "sensor_serial_number",
158
- # "station_name",
159
- # "station_number",
158
+ # "firmware_iop",
159
+ # "firmware_dsp",
160
+ # "sensor_serial_number",
161
+ # "station_name",
162
+ # "station_number",
160
163
# ]
161
164
# df = df.drop(columns=columns_to_drop)
162
- return df
163
-
164
- #---------------------------------------------------------------------.
165
+ return df
166
+
167
+ # ---------------------------------------------------------------------.
165
168
#### Iterate over all files (aka timesteps) in the daily zip archive
166
169
# - Each file contain a single timestep !
167
- list_df = []
170
+ list_df = []
168
171
with zipfile .ZipFile (filepath , "r" ) as zip_ref :
169
172
filenames = sorted (zip_ref .namelist ())
170
173
for filename in filenames :
@@ -174,13 +177,12 @@ def read_txt_file(file, filename):
174
177
try :
175
178
df = read_txt_file (file = f , filename = filename )
176
179
list_df .append (df )
177
- except Exception as e :
178
- msg = f"An error occured while reading { filename } . The error is: { e } ."
180
+ except Exception as e :
181
+ msg = f"An error occured while reading { filename } . The error is: { e } ."
179
182
log_error (logger = logger , msg = msg , verbose = True )
180
-
183
+
181
184
# Concatenate all dataframes into a single one
182
- df = pd .concat (list_df )
183
-
184
- #---------------------------------------------------------------------.
185
- return df
186
-
185
+ df = pd .concat (list_df )
186
+
187
+ # ---------------------------------------------------------------------.
188
+ return df
0 commit comments