Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xbout/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .load import open_boutdataset, collect
from .load import open_boutdataset, reload_boutdataset, collect

from . import geometries
from .geometries import register_geometry, REGISTERED_GEOMETRIES
Expand Down
38 changes: 24 additions & 14 deletions xbout/boutdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,32 +119,42 @@ def save(self, savepath='./boutdata.nc', filetype='NETCDF4',
raise ValueError('Must provide a path to which to save the data.')

if save_dtype is not None:
to_save = to_save.astype(save_dtype)
# Workaround to keep attributes while calling astype. See
# https://github.com/pydata/xarray/issues/2049
# https://github.com/pydata/xarray/pull/2070
for da in chain(to_save.values(), to_save.coords.values()):
da.data = da.data.astype(save_dtype)

# make shallow copy of Dataset, so we do not modify the attributes of the data
# when we change things to save
to_save = to_save.copy()

options = to_save.attrs.pop('options')
if options:
# TODO Convert Ben's options class to a (flattened) nested
# dictionary then store it in ds.attrs?
raise NotImplementedError("Haven't decided how to write options "
"file back out yet")
else:
# Delete placeholders for options on each variable and coordinate
for var in chain(to_save.data_vars, to_save.coords):
try:
del to_save[var].attrs['options']
except KeyError:
pass
warnings.warn(
"Haven't decided how to write options file back out yet - deleting "
"options for now. To re-load this Dataset, pass the same inputfilepath "
"to open_boutdataset when re-loading."
)
# Delete placeholders for options on each variable and coordinate
for var in chain(to_save.data_vars, to_save.coords):
try:
del to_save[var].attrs['options']
except KeyError:
pass

# Store the metadata as individual attributes instead because
# netCDF can't handle storing arbitrary objects in attrs
def dict_to_attrs(obj, key):
for key, value in obj.attrs.pop(key).items():
obj.attrs[key] = value
def dict_to_attrs(obj, section):
for key, value in obj.attrs.pop(section).items():
obj.attrs[section + ":" + key] = value
dict_to_attrs(to_save, 'metadata')
# Must do this for all variables and coordinates in dataset too
for varname, da in chain(to_save.data_vars.items(), to_save.coords.items()):
try:
dict_to_attrs(da, key='metadata')
dict_to_attrs(da, 'metadata')
except KeyError:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would happen if there isn't any metadata?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there's no metadata on a variable then there will be a KeyError and that nothing is done for that variable.

pass

Expand Down
7 changes: 4 additions & 3 deletions xbout/geometries.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,10 @@ def add_toroidal_geometry_coords(ds, *, coordinates=None, grid=None):
for v in needed_variables:
if v not in ds:
if grid is None:
raise ValueError("Grid file is required to provide %s. Pass the grid "
"file name as the 'gridfilepath' argument to "
"open_boutdataset().")
raise ValueError(
f"Grid file is required to provide {v}. Pass the grid file name as "
f"the 'gridfilepath' argument to open_boutdataset()."
)
ds[v] = grid[v]

# Rename 't' if user requested it
Expand Down
103 changes: 91 additions & 12 deletions xbout/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from warnings import warn
from pathlib import Path
from functools import partial
from itertools import chain
import configparser

import xarray as xr
Expand Down Expand Up @@ -41,7 +42,7 @@


def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None,
geometry=None, gridfilepath=None, chunks={},
geometry=None, gridfilepath=None, chunks=None,
keep_xboundaries=True, keep_yboundaries=False,
run_name=None, info=True, pre_squashed=False, **kwargs):
"""
Expand Down Expand Up @@ -87,7 +88,7 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None,
Name to give to the whole dataset, e.g. 'JET_ELM_high_resolution'.
Useful if you are going to open multiple simulations and compare the
results.
info : bool, optional
info : bool or "terse", optional
pre_squashed : bool, optional
Set true when loading from data which was saved as separate variables
using ds.bout.save().
Expand All @@ -100,6 +101,9 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None,
ds : xarray.Dataset
"""

if chunks is None:
chunks = {}

if pre_squashed:
ds = xr.open_mfdataset(datapath, chunks=chunks, combine='nested',
concat_dim=None, **kwargs)
Expand Down Expand Up @@ -132,15 +136,7 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None,
latest_top_left['t'] = -1
ds[var] = ds[var].isel(latest_top_left).squeeze(drop=True)

if inputfilepath:
# Use Ben's options class to store all input file options
with open(inputfilepath, 'r') as f:
config_string = "[dummysection]\n" + f.read()
options = configparser.ConfigParser()
options.read_string(config_string)
else:
options = None
ds = _set_attrs_on_all_vars(ds, 'options', options)
ds = _add_options(ds, inputfilepath)

if geometry is None:
if geometry in ds.attrs:
Expand Down Expand Up @@ -176,6 +172,86 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None,
return ds


def reload_boutdataset(
datapath, inputfilepath=None, chunks=None, info=True, pre_squashed=False, **kwargs
):
"""
Reload a BoutDataset saved by bout.save(), restoring it to the state the original
Dataset was in when bout.save() was called

Parameters
----------
datapath : str
Path to netCDF file where the Dataset to be re-loaded was saved
inputfilepath : str, optional
'options' are not saved. 'inputfilepath' must be provided if 'options' should
be recreated for the reloaded Dataset
chunks : dict, optional
Passed to `xarray.open_mfdataset` or `xarray.open_dataset`
info : bool or "terse", optional
pre_squashed : bool, optional
Set true when loading from data which was saved as separate variables
using ds.bout.save().
kwargs : optional
Keyword arguments are passed down to `xarray.open_mfdataset` or
`xarray.open_dataset`
"""
if pre_squashed:
ds = xr.open_mfdataset(datapath, chunks=chunks, combine='nested',
concat_dim=None, **kwargs)
else:
ds = xr.open_dataset(datapath, chunks=chunks, **kwargs)

def attrs_to_dict(obj, section):
result = {}
section = section + ":"
sectionlength = len(section)
for key in list(obj.attrs):
if key[:sectionlength] == section:
result[key[sectionlength:]] = obj.attrs.pop(key)
return result

def attrs_remove_section(obj, section):
section = section + ":"
sectionlength = len(section)
has_metadata = False
for key in list(obj.attrs):
if key[:sectionlength] == section:
has_metadata = True
del obj.attrs[key]
return has_metadata

# Restore metadata from attrs
metadata = attrs_to_dict(ds, "metadata")
ds.attrs["metadata"] = metadata
# Must do this for all variables and coordinates in dataset too
for da in chain(ds.data_vars.values(), ds.coords.values()):
if attrs_remove_section(da, "metadata"):
da.attrs["metadata"] = metadata

ds = _add_options(ds, inputfilepath)

if info == 'terse':
print("Read in dataset from {}".format(str(Path(datapath))))
elif info:
print("Read in:\n{}".format(ds.bout))

return ds


def _add_options(ds, inputfilepath):
if inputfilepath:
# Use Ben's options class to store all input file options
with open(inputfilepath, 'r') as f:
config_string = "[dummysection]\n" + f.read()
options = configparser.ConfigParser()
options.read_string(config_string)
else:
options = None
ds = _set_attrs_on_all_vars(ds, 'options', options)
return ds


def collect(varname, xind=None, yind=None, zind=None, tind=None,
path=".", yguards=False, xguards=True, info=True, prefix="BOUT.dmp"):

Expand Down Expand Up @@ -290,9 +366,12 @@ def _is_dump_files(datapath):
return True


def _auto_open_mfboutdataset(datapath, chunks={}, info=True,
def _auto_open_mfboutdataset(datapath, chunks=None, info=True,
keep_xboundaries=False, keep_yboundaries=False,
**kwargs):
if chunks is None:
chunks = {}

filepaths, filetype = _expand_filepaths(datapath)

# Open just one file to read processor splitting
Expand Down
99 changes: 91 additions & 8 deletions xbout/tests/test_boutdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path

from xbout.tests.test_load import bout_xyt_example_files, create_bout_ds
from xbout import BoutDatasetAccessor, open_boutdataset
from xbout import BoutDatasetAccessor, open_boutdataset, reload_boutdataset
from xbout.geometries import apply_geometry


Expand Down Expand Up @@ -105,17 +105,13 @@ def test_load_options_in_dataset(self, tmpdir_factory, bout_xyt_example_files):
class TestLoadLogFile:
pass

@pytest.mark.skip(reason="Need to sort out issue with saving metadata")
class TestSave:
@pytest.mark.parametrize("options", [False, True])
def test_save_all(self, tmpdir_factory, bout_xyt_example_files, options):
def test_save_all(self, tmpdir_factory, bout_xyt_example_files):
# Create data
path = bout_xyt_example_files(tmpdir_factory, nxpe=4, nype=5, nt=1)

# Load it as a boutdataset
original = open_boutdataset(datapath=path, inputfilepath=None)
if not options:
original.attrs['options'] = {}

# Save it to a netCDF file
savepath = str(Path(path).parent) + 'temp_boutdata.nc'
Expand All @@ -124,9 +120,52 @@ def test_save_all(self, tmpdir_factory, bout_xyt_example_files, options):
# Load it again using bare xarray
recovered = open_dataset(savepath)

# Compare
# Compare equal (not identical because attributes are changed when saving)
xrt.assert_equal(original, recovered)

@pytest.mark.parametrize("geometry", [None, "toroidal"])
def test_reload_all(self, tmpdir_factory, bout_xyt_example_files, geometry):
if geometry is not None:
grid = "grid"
else:
grid = None

# Create data
path = bout_xyt_example_files(tmpdir_factory, nxpe=4, nype=5, nt=1, grid=grid)

if grid is not None:
gridpath = str(Path(path).parent) + "/grid.nc"
else:
gridpath = None

# Load it as a boutdataset
original = open_boutdataset(
datapath=path,
inputfilepath=None,
geometry=geometry,
gridfilepath=gridpath,
)

# Save it to a netCDF file
savepath = str(Path(path).parent) + 'temp_boutdata.nc'
original.bout.save(savepath=savepath)

# Load it again
recovered = reload_boutdataset(savepath)

# Compare
for coord in original.coords.values():
# Get rid of the options if they exist, because options are not dealt with
# totally consistently: they exist if a coord was created from a variable
# loaded from the BOUT++ output, but not if the coord was calculated from
# some parameters or loaded from a grid file
try:
del coord.attrs["options"]
except KeyError:
pass
xrt.assert_identical(original.load(), recovered.load())

@pytest.mark.skip("saving and loading as float32 does not work")
@pytest.mark.parametrize("save_dtype", [np.float64, np.float32])
def test_save_dtype(self, tmpdir_factory, bout_xyt_example_files, save_dtype):

Expand Down Expand Up @@ -160,9 +199,53 @@ def test_save_separate_variables(self, tmpdir_factory, bout_xyt_example_files):
savepath = str(Path(path).parent) + '/temp_boutdata_' + var + '.nc'
recovered = open_dataset(savepath)

# Compare
# Compare equal (not identical because attributes are changed when saving)
xrt.assert_equal(recovered[var], original[var])

@pytest.mark.parametrize("geometry", [None, "toroidal"])
def test_reload_separate_variables(
self, tmpdir_factory, bout_xyt_example_files, geometry
):
if geometry is not None:
grid = "grid"
else:
grid = None

path = bout_xyt_example_files(tmpdir_factory, nxpe=4, nype=1, nt=1, grid=grid)

if grid is not None:
gridpath = str(Path(path).parent) + "/grid.nc"
else:
gridpath = None

# Load it as a boutdataset
original = open_boutdataset(
datapath=path,
inputfilepath=None,
geometry=geometry,
gridfilepath=gridpath,
)

# Save it to a netCDF file
savepath = str(Path(path).parent) + '/temp_boutdata.nc'
original.bout.save(savepath=savepath, separate_vars=True)

# Load it again
savepath = str(Path(path).parent) + '/temp_boutdata_*.nc'
recovered = reload_boutdataset(savepath, pre_squashed=True)

# Compare
for coord in original.coords.values():
# Get rid of the options if they exist, because options are not dealt with
# totally consistently: they exist if a coord was created from a variable
# loaded from the BOUT++ output, but not if the coord was calculated from
# some parameters or loaded from a grid file
try:
del coord.attrs["options"]
except KeyError:
pass
xrt.assert_identical(recovered, original)


class TestSaveRestart:
pass