Skip to content

Commit fbab112

Browse files
dopplershiftdcamron
authored andcommitted
ENH: Add client for NOAA/CIRA MLWP archive
1 parent 627f6b6 commit fbab112

File tree

6 files changed

+425
-1
lines changed

6 files changed

+425
-1
lines changed

.codespellexclude

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@
1616
components that are earth-relative. The primary exception is NAM output with wind
1717
col_head.SELV,
1818
row_head.SELV,
19+
by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (

examples/remote/ml_forecast.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright (c) 2025 MetPy Developers.
2+
# Distributed under the terms of the BSD 3-Clause License.
3+
# SPDX-License-Identifier: BSD-3-Clause
4+
"""
5+
=========================================
6+
ML Weather Prediction Access and Plotting
7+
=========================================
8+
9+
Use MetPy to access machine learning weather prediction (MLWP) data in AWS S3 and plot using
10+
the simplified plotting interface.
11+
"""
12+
from datetime import datetime
13+
14+
from metpy.plots import MapPanel, PanelContainer, RasterPlot
15+
from metpy.remote import MLWPArchive
16+
17+
###################
18+
# Access the GraphCast forecast closest to the desired date/time
19+
dt = datetime(2025, 2, 15, 18)
20+
ds = MLWPArchive().get_product('graphcast', dt).access()
21+
22+
###################
23+
# Plot the data using MetPy's simplified plotting interface.
24+
raster = RasterPlot()
25+
raster.data = ds
26+
raster.field = 't2'
27+
raster.time = dt
28+
raster.colorbar = 'horizontal'
29+
raster.colormap = 'RdBu_r'
30+
31+
panel = MapPanel()
32+
panel.area = 'co'
33+
panel.projection = 'lcc'
34+
panel.layers = ['coastline', 'borders', 'states']
35+
panel.plots = [raster]
36+
panel.title = f"{ds[raster.field].attrs['long_name']} @ {dt}"
37+
38+
pc = PanelContainer()
39+
pc.size = (8, 8)
40+
pc.panels = [panel]
41+
pc.draw()
42+
43+
pc.show()

src/metpy/remote/aws.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,3 +600,123 @@ def _build_result(self, obj):
600600
"""Build a product that opens the data using `xarray.open_dataset`."""
601601
return AWSProduct(obj,
602602
lambda s: xr.open_dataset(s.url + '#mode=bytes', engine='netcdf4'))
603+
604+
605+
@exporter.export
606+
class MLWPArchive(S3DataStore):
607+
"""Access data from the NOAA/CIRA Machine-Learning Weather Prediction archive in AWS.
608+
609+
This consists of individual model runs stored in netCDF format, across a variety
610+
a collection of models (Aurora, FourCastNet, GraphCast, Pangu) and initial conditions
611+
(GFS or IFS).
612+
613+
"""
614+
615+
_model_map = {'aurora': 'AURO', 'fourcastnet': 'FOUR',
616+
'graphcast': 'GRAP', 'pangu': 'PANG'}
617+
618+
def __init__(self):
619+
super().__init__('noaa-oar-mlwp-data')
620+
621+
def _model_id(self, model, version, init):
622+
"""Build a model id from the model name, version, and initial conditions."""
623+
init = init or 'GFS'
624+
model = self._model_map.get(model.lower(), model)
625+
if version is None:
626+
model_id = sorted(self.common_prefixes(model + '_', '_'))[-1]
627+
else:
628+
version = str(version)
629+
if len(version) < 3:
630+
version = version + '00'
631+
model_id = f'{model}_v{version}_'
632+
return f'{model_id}{init}'
633+
634+
def _build_key(self, model_id, dt, depth=None):
635+
"""Build a key for the bucket up to the desired point."""
636+
first_hour = 0
637+
last_hour = 240
638+
step_hours = 6
639+
parts = [model_id, f'{dt:%Y}', f'{dt:%m%d}',
640+
f'{model_id}_{dt:%Y%m%d%H}_'
641+
f'f{first_hour:03d}_f{last_hour:03d}_{step_hours:02d}.nc']
642+
return self.delimiter.join(parts[slice(0, depth)])
643+
644+
def dt_from_key(self, key): # noqa: D102
645+
# Docstring inherited
646+
# GRAP_v100_GFS_2025021212_f000_f240_06.nc
647+
dt = key.split('/')[-1].split('_')[3]
648+
return datetime.strptime(dt, '%Y%m%d%H').replace(tzinfo=timezone.utc)
649+
650+
def get_product(self, model, dt=None, version=None, init=None):
651+
"""Get a product from the archive.
652+
653+
Parameters
654+
----------
655+
model : str
656+
The selected model to get data for. Can be any of the four-letter codes supported
657+
by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (
658+
case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
659+
``'pangu'``.
660+
dt : `datetime.datetime`, optional
661+
The desired date/time for the model run; the one closest matching in time will
662+
be returned. This should have the proper timezone included; if not specified, UTC
663+
will be assumed. If ``None``, defaults to the current UTC date/time.
664+
version : str or int, optional
665+
The particular version of the model to select. If not given, the query will try
666+
to select the most recent version of the model.
667+
init : str, optional
668+
Selects the model run initialized with a particular set of initial conditions.
669+
Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
670+
671+
See Also
672+
--------
673+
get_range
674+
675+
"""
676+
dt = datetime.now(timezone.utc) if dt is None else ensure_timezone(dt)
677+
model_id = self._model_id(model, version, init)
678+
search_key = self._build_key(model_id, dt)
679+
prefix = search_key.rsplit('_', maxsplit=4)[0]
680+
return self._closest_result(self.objects(prefix), dt)
681+
682+
def get_range(self, model, start, end, version=None, init=None):
683+
"""Yield products within a particular date/time range.
684+
685+
Parameters
686+
----------
687+
model : str
688+
The selected model to get data for. Can be any of the four-letter codes supported
689+
by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (
690+
case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
691+
``'pangu'``.
692+
start : `datetime.datetime`
693+
The start of the date/time range. This should have the proper timezone included;
694+
if not specified, UTC will be assumed.
695+
end : `datetime.datetime`
696+
The end of the date/time range. This should have the proper timezone included;
697+
if not specified, UTC will be assumed.
698+
version : str or int, optional
699+
The particular version of the model to select. If not given, the query will try
700+
to select the most recent version of the model.
701+
init : str, optional
702+
Selects the model run initialized with a particular set of initial conditions.
703+
Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
704+
705+
See Also
706+
--------
707+
get_product
708+
709+
"""
710+
start = ensure_timezone(start)
711+
end = ensure_timezone(end)
712+
model_id = self._model_id(model, version, init)
713+
for dt in date_iterator(start, end, days=1):
714+
prefix = self._build_key(model_id, dt, depth=3)
715+
for obj in self.objects(prefix):
716+
if start <= self.dt_from_key(obj.key) < end:
717+
yield self._build_result(obj)
718+
719+
def _build_result(self, obj):
720+
"""Build a product that opens the data using `xarray.open_dataset`."""
721+
return AWSProduct(obj,
722+
lambda s: xr.open_dataset(s.url + '#mode=bytes', engine='netcdf4'))

tests/remote/fixtures/test_mlwp_range.yaml

Lines changed: 158 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)