-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnc_single_band.py
256 lines (203 loc) · 8.59 KB
/
nc_single_band.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
""" This module contains functionality to produce a NetCDF-4 file that should
contain a single science variable and it's associated projected
coordinates. The format of each file should be:
Groups:
- All non-science variables should be in the root group of the NetCDF-4
dataset.
- The science variable should be in the same location within the output
dataset as the original input variable was within its dataset.
Variables:
- science variable.
- crs (named according to the CF Conventions).
- x (or lon if geographic).
- y (or lat if geographic).
Dimensions:
- x (or lon if geographic).
- y (or lat if geographic).
"""
from typing import Dict, Tuple
import numpy as np
from netCDF4 import Dataset
from pyresample.geometry import AreaDefinition
DIMENSION_METADATA = {
'lat': {
'long_name': 'latitude',
'standard_name': 'latitude',
'units': 'degrees_north',
},
'lon': {
'long_name': 'longitude',
'standard_name': 'longitude',
'units': 'degrees_east',
},
'x': {
'long_name': 'x coordinate of projection',
'standard_name': 'projection_x_coordinate',
'units': 'm',
},
'y': {
'long_name': 'y coordinate of projection',
'standard_name': 'projection_y_coordinate',
'units': 'm',
},
}
HARMONY_TARGET = 'harmony_message_target'
def write_single_band_output(
target_area: AreaDefinition,
reprojected_data: np.ndarray,
variable_name: str,
variable_output_path: str,
reprojection_cache: Dict,
attributes: Dict,
) -> None:
"""The main interface for this module. Each single band output file
will contain the following properties:
- A reprojected, 2-dimensional science variable.
- Projected x and y dimensions.
- A 1-dimensional variable for each of the associated dimensions. The
science variable should use these as its dimensions.
- A grid mapping variable, with metadata conforming to the CF
Conventions. The science variable should refer to this in its
metadata.
"""
with Dataset(variable_output_path, 'w', format='NETCDF4') as output_file:
dimensions = write_dimensions(output_file, target_area, reprojection_cache)
grid_mapping_name = write_grid_mapping(output_file, target_area, dimensions)
write_science_variable(
output_file,
reprojected_data,
variable_name,
dimensions,
grid_mapping_name,
attributes,
)
write_dimension_variables(output_file, dimensions, target_area)
def write_dimensions(
dataset: Dataset, target_area: AreaDefinition, cache: Dict
) -> Tuple[str]:
"""Derive the dimension names using the target area definition and the
information available in the reprojection cache. Then write the
dimensions to the output dataset. Finally, return the dimension names
for later use; e.g. defining the grid mapping name and writing the
dimension variables themselves.
Possible use-cases:
- The Harmony message fully defines a target area. All science
variables will use this, and the dimensions can just be ('y', 'x') or
('lat', 'lon').
- The Harmony message does not fully define a target area, but the
reprojection cache only has one key. The dimensions should be
('y', 'x') or ('lat', 'lon').
- The Harmony message does not fully define a target area, and the
reprojection cache has multiple keys. This means there are multiple
target grids. The dimensions for all grids after the first should
have a suffix added, so that they can all be included in the merged
output.
"""
coordinates_key = tuple(target_area.area_id.split(', '))
grid_mapping_name = target_area.crs.to_cf().get('grid_mapping_name')
# Identify the base name for the dimensions (geographic or not):
if grid_mapping_name == 'latitude_longitude':
x_dim = 'lon'
y_dim = 'lat'
else:
x_dim = 'x'
y_dim = 'y'
# Determine whether a suffix is required (e.g. multiple target grids).
if HARMONY_TARGET not in cache:
# Use any cached dimensions
cached_dimensions = cache[coordinates_key].get('dimensions')
if cached_dimensions is not None:
y_dim, x_dim = cached_dimensions
else:
# Derive suffix
target_grids = len(cache)
if target_grids == 1:
# The first dimensions will be ('y', 'x') or ('lat', 'lon')
dimension_suffix = ''
else:
# Subsequent dimensions will be (y_1, x_1), (y_2, x_2), or
# ('lat_1', 'lon_1'), ('lat_2', 'lon_2') if geographic.
dimension_suffix = f'_{str(target_grids - 1)}'
# Append suffix
x_dim += dimension_suffix
y_dim += dimension_suffix
# Save the dimension information in the cache:
cache[coordinates_key]['dimensions'] = (y_dim, x_dim)
dataset.createDimension(y_dim, size=target_area.shape[0])
dataset.createDimension(x_dim, size=target_area.shape[1])
return (y_dim, x_dim)
def write_grid_mapping(
dataset: Dataset, target_area: AreaDefinition, dimensions: Tuple[str]
) -> str:
"""Use the `pyresample.geometry.AreaDefition` instance associated with
the target area of reprojection to write out a `grid_mapping`
variable to the single band output `netCDF4.Dataset`.
Return the grid_mapping name for use as the `grid_mapping_name` of
the science variable.
In the instance that there are multiple grids in the output file,
which would result from multiple input grids, this function will
use the extended form of `grid_mapping_name`. This format should be:
'<gridMappingVariable>: <coordinatesVariable> [<coordinatesVariable>]'
See section 5.6 of the CF-Conventions for more information:
http://cfconventions.org/cf-conventions/cf-conventions.html
"""
grid_mapping_attributes = target_area.crs.to_cf()
if 'grid_mapping_name' not in grid_mapping_attributes:
grid_mapping_attributes['grid_mapping_name'] = 'crs'
# Check if there are multiple grids and, if so, use the extended format of
# grid mapping name.
if dimensions not in [('lat', 'lon'), ('y', 'x')]:
grid_mapping_attributes[
'grid_mapping_name'
] += f'_{dimensions[0]}_{dimensions[1]}'
grid_mapping = dataset.createVariable(
grid_mapping_attributes['grid_mapping_name'], 'S1'
)
grid_mapping.setncatts(grid_mapping_attributes)
return grid_mapping_attributes.get('grid_mapping_name')
def write_science_variable(
dataset: Dataset,
data_values: np.ndarray,
variable_full_name: str,
dimensions: Tuple[str],
grid_mapping_name: str,
attributes: Dict,
) -> None:
"""Add the science variable to the output `netCDF4.Dataset` instance.
This variable will require:
- The reprojected values to be assigned to the variable array.
- The reprojected dimensions to be associated with the variable.
- The `grid_mapping_name` to be included as an attribute.
- Scaling metadata attributes, if present on the input value.
Note, the `netCDF4` library automatically applied the `add_offset` and
`scale_factor` keywords on reading and writing of `Variable` objects.
"""
variable = dataset.createVariable(
variable_full_name, data_values.dtype, dimensions=dimensions
)
attributes['grid_mapping'] = grid_mapping_name
variable.setncatts(attributes)
variable[:] = data_values[:]
def write_dimension_variables(
dataset: Dataset, dimensions: Tuple[str], target_area: AreaDefinition
) -> None:
"""Write projected x and y coordinate information to the `netCDF4.Dataset`
instance, each as a `netCDF4.Variable`. Each dimension variable
should have:
- A 1-dimension array of the projected values of that dimension.
- A reference to itself as a dimension.
- Metadata that includes the dimension variable's name and units.
"""
x_vector, y_vector = target_area.get_proj_vectors()
dimension_data = {dimensions[0]: y_vector, dimensions[1]: x_vector}
for dimension_name, dimension_vector in dimension_data.items():
variable = dataset.createVariable(
dimension_name, dimension_vector.dtype, dimensions=(dimension_name,)
)
variable[:] = dimension_vector
attributes = next(
attributes
for coordinate, attributes in DIMENSION_METADATA.items()
if dimension_name.startswith(coordinate)
)
variable.setncatts(attributes)