@@ -600,3 +600,123 @@ def _build_result(self, obj):
600
600
"""Build a product that opens the data using `xarray.open_dataset`."""
601
601
return AWSProduct (obj ,
602
602
lambda s : xr .open_dataset (s .url + '#mode=bytes' , engine = 'netcdf4' ))
603
+
604
+
605
+ @exporter .export
606
+ class MLWPArchive (S3DataStore ):
607
+ """Access data from the NOAA/CIRA Machine-Learning Weather Prediction archive in AWS.
608
+
609
+ This consists of individual model runs stored in netCDF format, across a variety
610
+ a collection of models (Aurora, FourCastNet, GraphCast, Pangu) and initial conditions
611
+ (GFS or IFS).
612
+
613
+ """
614
+
615
+ _model_map = {'aurora' : 'AURO' , 'fourcastnet' : 'FOUR' ,
616
+ 'graphcast' : 'GRAP' , 'pangu' : 'PANG' }
617
+
618
+ def __init__ (self ):
619
+ super ().__init__ ('noaa-oar-mlwp-data' )
620
+
621
+ def _model_id (self , model , version , init ):
622
+ """Build a model id from the model name, version, and initial conditions."""
623
+ init = init or 'GFS'
624
+ model = self ._model_map .get (model .lower (), model )
625
+ if version is None :
626
+ model_id = sorted (self .common_prefixes (model + '_' , '_' ))[- 1 ]
627
+ else :
628
+ version = str (version )
629
+ if len (version ) < 3 :
630
+ version = version + '00'
631
+ model_id = f'{ model } _v{ version } _'
632
+ return f'{ model_id } { init } '
633
+
634
+ def _build_key (self , model_id , dt , depth = None ):
635
+ """Build a key for the bucket up to the desired point."""
636
+ first_hour = 0
637
+ last_hour = 240
638
+ step_hours = 6
639
+ parts = [model_id , f'{ dt :%Y} ' , f'{ dt :%m%d} ' ,
640
+ f'{ model_id } _{ dt :%Y%m%d%H} _'
641
+ f'f{ first_hour :03d} _f{ last_hour :03d} _{ step_hours :02d} .nc' ]
642
+ return self .delimiter .join (parts [slice (0 , depth )])
643
+
644
+ def dt_from_key (self , key ): # noqa: D102
645
+ # Docstring inherited
646
+ # GRAP_v100_GFS_2025021212_f000_f240_06.nc
647
+ dt = key .split ('/' )[- 1 ].split ('_' )[3 ]
648
+ return datetime .strptime (dt , '%Y%m%d%H' ).replace (tzinfo = timezone .utc )
649
+
650
+ def get_product (self , model , dt = None , version = None , init = None ):
651
+ """Get a product from the archive.
652
+
653
+ Parameters
654
+ ----------
655
+ model : str
656
+ The selected model to get data for. Can be any of the four-letter codes supported
657
+ by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (
658
+ case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
659
+ ``'pangu'``.
660
+ dt : `datetime.datetime`, optional
661
+ The desired date/time for the model run; the one closest matching in time will
662
+ be returned. This should have the proper timezone included; if not specified, UTC
663
+ will be assumed. If ``None``, defaults to the current UTC date/time.
664
+ version : str or int, optional
665
+ The particular version of the model to select. If not given, the query will try
666
+ to select the most recent version of the model.
667
+ init : str, optional
668
+ Selects the model run initialized with a particular set of initial conditions.
669
+ Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
670
+
671
+ See Also
672
+ --------
673
+ get_range
674
+
675
+ """
676
+ dt = datetime .now (timezone .utc ) if dt is None else ensure_timezone (dt )
677
+ model_id = self ._model_id (model , version , init )
678
+ search_key = self ._build_key (model_id , dt )
679
+ prefix = search_key .rsplit ('_' , maxsplit = 4 )[0 ]
680
+ return self ._closest_result (self .objects (prefix ), dt )
681
+
682
+ def get_range (self , model , start , end , version = None , init = None ):
683
+ """Yield products within a particular date/time range.
684
+
685
+ Parameters
686
+ ----------
687
+ model : str
688
+ The selected model to get data for. Can be any of the four-letter codes supported
689
+ by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (
690
+ case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
691
+ ``'pangu'``.
692
+ start : `datetime.datetime`
693
+ The start of the date/time range. This should have the proper timezone included;
694
+ if not specified, UTC will be assumed.
695
+ end : `datetime.datetime`
696
+ The end of the date/time range. This should have the proper timezone included;
697
+ if not specified, UTC will be assumed.
698
+ version : str or int, optional
699
+ The particular version of the model to select. If not given, the query will try
700
+ to select the most recent version of the model.
701
+ init : str, optional
702
+ Selects the model run initialized with a particular set of initial conditions.
703
+ Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
704
+
705
+ See Also
706
+ --------
707
+ get_product
708
+
709
+ """
710
+ start = ensure_timezone (start )
711
+ end = ensure_timezone (end )
712
+ model_id = self ._model_id (model , version , init )
713
+ for dt in date_iterator (start , end , days = 1 ):
714
+ prefix = self ._build_key (model_id , dt , depth = 3 )
715
+ for obj in self .objects (prefix ):
716
+ if start <= self .dt_from_key (obj .key ) < end :
717
+ yield self ._build_result (obj )
718
+
719
+ def _build_result (self , obj ):
720
+ """Build a product that opens the data using `xarray.open_dataset`."""
721
+ return AWSProduct (obj ,
722
+ lambda s : xr .open_dataset (s .url + '#mode=bytes' , engine = 'netcdf4' ))
0 commit comments