diff --git a/xbout/__init__.py b/xbout/__init__.py index 8cffd586..914be7ee 100644 --- a/xbout/__init__.py +++ b/xbout/__init__.py @@ -1,4 +1,4 @@ -from .load import open_boutdataset, collect +from .load import open_boutdataset, reload_boutdataset, collect from . import geometries from .geometries import register_geometry, REGISTERED_GEOMETRIES diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 847b22a3..17f8438b 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -119,32 +119,42 @@ def save(self, savepath='./boutdata.nc', filetype='NETCDF4', raise ValueError('Must provide a path to which to save the data.') if save_dtype is not None: - to_save = to_save.astype(save_dtype) + # Workaround to keep attributes while calling astype. See + # https://github.com/pydata/xarray/issues/2049 + # https://github.com/pydata/xarray/pull/2070 + for da in chain(to_save.values(), to_save.coords.values()): + da.data = da.data.astype(save_dtype) + + # make shallow copy of Dataset, so we do not modify the attributes of the data + # when we change things to save + to_save = to_save.copy() options = to_save.attrs.pop('options') if options: # TODO Convert Ben's options class to a (flattened) nested # dictionary then store it in ds.attrs? - raise NotImplementedError("Haven't decided how to write options " - "file back out yet") - else: - # Delete placeholders for options on each variable and coordinate - for var in chain(to_save.data_vars, to_save.coords): - try: - del to_save[var].attrs['options'] - except KeyError: - pass + warnings.warn( + "Haven't decided how to write options file back out yet - deleting " + "options for now. To re-load this Dataset, pass the same inputfilepath " + "to open_boutdataset when re-loading." + ) + # Delete placeholders for options on each variable and coordinate + for var in chain(to_save.data_vars, to_save.coords): + try: + del to_save[var].attrs['options'] + except KeyError: + pass # Store the metadata as individual attributes instead because # netCDF can't handle storing arbitrary objects in attrs - def dict_to_attrs(obj, key): - for key, value in obj.attrs.pop(key).items(): - obj.attrs[key] = value + def dict_to_attrs(obj, section): + for key, value in obj.attrs.pop(section).items(): + obj.attrs[section + ":" + key] = value dict_to_attrs(to_save, 'metadata') # Must do this for all variables and coordinates in dataset too for varname, da in chain(to_save.data_vars.items(), to_save.coords.items()): try: - dict_to_attrs(da, key='metadata') + dict_to_attrs(da, 'metadata') except KeyError: pass diff --git a/xbout/geometries.py b/xbout/geometries.py index 2aa6189c..75812ea5 100644 --- a/xbout/geometries.py +++ b/xbout/geometries.py @@ -131,9 +131,10 @@ def add_toroidal_geometry_coords(ds, *, coordinates=None, grid=None): for v in needed_variables: if v not in ds: if grid is None: - raise ValueError("Grid file is required to provide %s. Pass the grid " - "file name as the 'gridfilepath' argument to " - "open_boutdataset().") + raise ValueError( + f"Grid file is required to provide {v}. Pass the grid file name as " + f"the 'gridfilepath' argument to open_boutdataset()." + ) ds[v] = grid[v] # Rename 't' if user requested it diff --git a/xbout/load.py b/xbout/load.py index 1f1c09c5..bced49cd 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -2,6 +2,7 @@ from warnings import warn from pathlib import Path from functools import partial +from itertools import chain import configparser import xarray as xr @@ -41,7 +42,7 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None, - geometry=None, gridfilepath=None, chunks={}, + geometry=None, gridfilepath=None, chunks=None, keep_xboundaries=True, keep_yboundaries=False, run_name=None, info=True, pre_squashed=False, **kwargs): """ @@ -87,7 +88,7 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None, Name to give to the whole dataset, e.g. 'JET_ELM_high_resolution'. Useful if you are going to open multiple simulations and compare the results. - info : bool, optional + info : bool or "terse", optional pre_squashed : bool, optional Set true when loading from data which was saved as separate variables using ds.bout.save(). @@ -100,6 +101,9 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None, ds : xarray.Dataset """ + if chunks is None: + chunks = {} + if pre_squashed: ds = xr.open_mfdataset(datapath, chunks=chunks, combine='nested', concat_dim=None, **kwargs) @@ -132,15 +136,7 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None, latest_top_left['t'] = -1 ds[var] = ds[var].isel(latest_top_left).squeeze(drop=True) - if inputfilepath: - # Use Ben's options class to store all input file options - with open(inputfilepath, 'r') as f: - config_string = "[dummysection]\n" + f.read() - options = configparser.ConfigParser() - options.read_string(config_string) - else: - options = None - ds = _set_attrs_on_all_vars(ds, 'options', options) + ds = _add_options(ds, inputfilepath) if geometry is None: if geometry in ds.attrs: @@ -176,6 +172,86 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None, return ds +def reload_boutdataset( + datapath, inputfilepath=None, chunks=None, info=True, pre_squashed=False, **kwargs +): + """ + Reload a BoutDataset saved by bout.save(), restoring it to the state the original + Dataset was in when bout.save() was called + + Parameters + ---------- + datapath : str + Path to netCDF file where the Dataset to be re-loaded was saved + inputfilepath : str, optional + 'options' are not saved. 'inputfilepath' must be provided if 'options' should + be recreated for the reloaded Dataset + chunks : dict, optional + Passed to `xarray.open_mfdataset` or `xarray.open_dataset` + info : bool or "terse", optional + pre_squashed : bool, optional + Set true when loading from data which was saved as separate variables + using ds.bout.save(). + kwargs : optional + Keyword arguments are passed down to `xarray.open_mfdataset` or + `xarray.open_dataset` + """ + if pre_squashed: + ds = xr.open_mfdataset(datapath, chunks=chunks, combine='nested', + concat_dim=None, **kwargs) + else: + ds = xr.open_dataset(datapath, chunks=chunks, **kwargs) + + def attrs_to_dict(obj, section): + result = {} + section = section + ":" + sectionlength = len(section) + for key in list(obj.attrs): + if key[:sectionlength] == section: + result[key[sectionlength:]] = obj.attrs.pop(key) + return result + + def attrs_remove_section(obj, section): + section = section + ":" + sectionlength = len(section) + has_metadata = False + for key in list(obj.attrs): + if key[:sectionlength] == section: + has_metadata = True + del obj.attrs[key] + return has_metadata + + # Restore metadata from attrs + metadata = attrs_to_dict(ds, "metadata") + ds.attrs["metadata"] = metadata + # Must do this for all variables and coordinates in dataset too + for da in chain(ds.data_vars.values(), ds.coords.values()): + if attrs_remove_section(da, "metadata"): + da.attrs["metadata"] = metadata + + ds = _add_options(ds, inputfilepath) + + if info == 'terse': + print("Read in dataset from {}".format(str(Path(datapath)))) + elif info: + print("Read in:\n{}".format(ds.bout)) + + return ds + + +def _add_options(ds, inputfilepath): + if inputfilepath: + # Use Ben's options class to store all input file options + with open(inputfilepath, 'r') as f: + config_string = "[dummysection]\n" + f.read() + options = configparser.ConfigParser() + options.read_string(config_string) + else: + options = None + ds = _set_attrs_on_all_vars(ds, 'options', options) + return ds + + def collect(varname, xind=None, yind=None, zind=None, tind=None, path=".", yguards=False, xguards=True, info=True, prefix="BOUT.dmp"): @@ -290,9 +366,12 @@ def _is_dump_files(datapath): return True -def _auto_open_mfboutdataset(datapath, chunks={}, info=True, +def _auto_open_mfboutdataset(datapath, chunks=None, info=True, keep_xboundaries=False, keep_yboundaries=False, **kwargs): + if chunks is None: + chunks = {} + filepaths, filetype = _expand_filepaths(datapath) # Open just one file to read processor splitting diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index 5c18918e..df5a8d49 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -6,7 +6,7 @@ from pathlib import Path from xbout.tests.test_load import bout_xyt_example_files, create_bout_ds -from xbout import BoutDatasetAccessor, open_boutdataset +from xbout import BoutDatasetAccessor, open_boutdataset, reload_boutdataset from xbout.geometries import apply_geometry @@ -105,17 +105,13 @@ def test_load_options_in_dataset(self, tmpdir_factory, bout_xyt_example_files): class TestLoadLogFile: pass -@pytest.mark.skip(reason="Need to sort out issue with saving metadata") class TestSave: - @pytest.mark.parametrize("options", [False, True]) - def test_save_all(self, tmpdir_factory, bout_xyt_example_files, options): + def test_save_all(self, tmpdir_factory, bout_xyt_example_files): # Create data path = bout_xyt_example_files(tmpdir_factory, nxpe=4, nype=5, nt=1) # Load it as a boutdataset original = open_boutdataset(datapath=path, inputfilepath=None) - if not options: - original.attrs['options'] = {} # Save it to a netCDF file savepath = str(Path(path).parent) + 'temp_boutdata.nc' @@ -124,9 +120,52 @@ def test_save_all(self, tmpdir_factory, bout_xyt_example_files, options): # Load it again using bare xarray recovered = open_dataset(savepath) - # Compare + # Compare equal (not identical because attributes are changed when saving) xrt.assert_equal(original, recovered) + @pytest.mark.parametrize("geometry", [None, "toroidal"]) + def test_reload_all(self, tmpdir_factory, bout_xyt_example_files, geometry): + if geometry is not None: + grid = "grid" + else: + grid = None + + # Create data + path = bout_xyt_example_files(tmpdir_factory, nxpe=4, nype=5, nt=1, grid=grid) + + if grid is not None: + gridpath = str(Path(path).parent) + "/grid.nc" + else: + gridpath = None + + # Load it as a boutdataset + original = open_boutdataset( + datapath=path, + inputfilepath=None, + geometry=geometry, + gridfilepath=gridpath, + ) + + # Save it to a netCDF file + savepath = str(Path(path).parent) + 'temp_boutdata.nc' + original.bout.save(savepath=savepath) + + # Load it again + recovered = reload_boutdataset(savepath) + + # Compare + for coord in original.coords.values(): + # Get rid of the options if they exist, because options are not dealt with + # totally consistently: they exist if a coord was created from a variable + # loaded from the BOUT++ output, but not if the coord was calculated from + # some parameters or loaded from a grid file + try: + del coord.attrs["options"] + except KeyError: + pass + xrt.assert_identical(original.load(), recovered.load()) + + @pytest.mark.skip("saving and loading as float32 does not work") @pytest.mark.parametrize("save_dtype", [np.float64, np.float32]) def test_save_dtype(self, tmpdir_factory, bout_xyt_example_files, save_dtype): @@ -160,9 +199,53 @@ def test_save_separate_variables(self, tmpdir_factory, bout_xyt_example_files): savepath = str(Path(path).parent) + '/temp_boutdata_' + var + '.nc' recovered = open_dataset(savepath) - # Compare + # Compare equal (not identical because attributes are changed when saving) xrt.assert_equal(recovered[var], original[var]) + @pytest.mark.parametrize("geometry", [None, "toroidal"]) + def test_reload_separate_variables( + self, tmpdir_factory, bout_xyt_example_files, geometry + ): + if geometry is not None: + grid = "grid" + else: + grid = None + + path = bout_xyt_example_files(tmpdir_factory, nxpe=4, nype=1, nt=1, grid=grid) + + if grid is not None: + gridpath = str(Path(path).parent) + "/grid.nc" + else: + gridpath = None + + # Load it as a boutdataset + original = open_boutdataset( + datapath=path, + inputfilepath=None, + geometry=geometry, + gridfilepath=gridpath, + ) + + # Save it to a netCDF file + savepath = str(Path(path).parent) + '/temp_boutdata.nc' + original.bout.save(savepath=savepath, separate_vars=True) + + # Load it again + savepath = str(Path(path).parent) + '/temp_boutdata_*.nc' + recovered = reload_boutdataset(savepath, pre_squashed=True) + + # Compare + for coord in original.coords.values(): + # Get rid of the options if they exist, because options are not dealt with + # totally consistently: they exist if a coord was created from a variable + # loaded from the BOUT++ output, but not if the coord was calculated from + # some parameters or loaded from a grid file + try: + del coord.attrs["options"] + except KeyError: + pass + xrt.assert_identical(recovered, original) + class TestSaveRestart: pass