Skip to content

Commit 6b8496b

Browse files
committed
Added xarraybackend.py to xBOUT as it is bout++ specific code not generic adios solution.
Open .bp files with "bout_adios2" engine in xarray
1 parent f416a66 commit 6b8496b

File tree

3 files changed

+310
-0
lines changed

3 files changed

+310
-0
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ Source = "https://github.com/boutproject/xBOUT"
7676
Tracker = "https://github.com/boutproject/xBOUT/issues"
7777
Documentation = "https://xbout.readthedocs.io/en/latest/"
7878

79+
[project.entry-points."xarray.backends"]
80+
bout_adios2 = "xbout.xarraybackend:BoutAdiosBackendEntrypoint"
81+
7982
[tool.setuptools_scm]
8083
write_to = "xbout/_version.py"
8184

xbout/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ def _check_filetype(path):
3434
filetype = "netcdf4"
3535
elif path.suffix == ".h5netcdf":
3636
filetype = "h5netcdf"
37+
elif path.suffix == ".bp":
38+
filetype = "bout_adios2"
39+
3740
else:
3841
raise IOError("Do not know how to read file extension {}".format(path.suffix))
3942
return filetype

xbout/xarraybackend.py

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
"""License:
2+
Distributed under the OSI-approved Apache License, Version 2.0. See
3+
accompanying file Copyright.txt for details.
4+
"""
5+
6+
from __future__ import annotations
7+
8+
import os
9+
10+
# import warnings
11+
12+
from collections.abc import Iterable
13+
from typing import TYPE_CHECKING, Any, ItemsView
14+
15+
import numpy as np
16+
from adios2 import FileReader
17+
18+
# from xarray.core.dataset import Dataset
19+
from xarray import Dataset, Variable
20+
from xarray import coding, conventions
21+
from xarray.backends.common import (
22+
BACKEND_ENTRYPOINTS,
23+
AbstractWritableDataStore,
24+
BackendArray,
25+
BackendEntrypoint,
26+
_encode_variable_name,
27+
_normalize_path,
28+
)
29+
30+
# from xarray.backends.store import StoreBackendEntrypoint
31+
from xarray.core import indexing
32+
33+
# from xarray.core.indexing import LazilyIndexedArray
34+
35+
# from xarray.core.parallelcompat import guess_chunkmanager
36+
# from xarray.core.pycompat import integer_types
37+
# from xarray.core.utils import (
38+
# FrozenDict,
39+
# HiddenKeyDict,
40+
# close_on_error,
41+
# )
42+
43+
if TYPE_CHECKING:
44+
from io import BufferedIOBase
45+
from xarray.backends.common import AbstractDataStore
46+
47+
# need some special secret attributes to tell us the dimensions
48+
DIMENSION_KEY = "time_dimension"
49+
50+
adios_to_numpy_type = {
51+
"char": np.char,
52+
"int8_t": np.int8,
53+
"int16_t": np.int16,
54+
"int32_t": np.int32,
55+
"int64_t": np.int64,
56+
"uint8_t": np.uint8,
57+
"uint16_t": np.uint16,
58+
"uint32_t": np.uint32,
59+
"uint64_t": np.uint64,
60+
"float": float,
61+
"double": np.double,
62+
"long double": np.longdouble,
63+
"float complex": np.complex64,
64+
"double complex": np.complex128,
65+
"string": np.char,
66+
}
67+
68+
69+
class BoutADIOSBackendArray(BackendArray):
70+
"""ADIOS2 backend for lazily indexed arrays"""
71+
72+
def __init__(self, shape: list, dtype: np.dtype, lock, adiosfile: FileReader, varname: str):
73+
self.shape = shape
74+
self.dtype = dtype
75+
self.lock = lock
76+
self.fh = adiosfile
77+
self.varname = varname
78+
self.adiosvar = self.fh.inquire_variable(varname)
79+
self.steps = self.adiosvar.steps()
80+
# print(f"BoutADIOSBackendArray.__init__: {dtype} {varname} {shape} {dtype.itemsize}")
81+
82+
def __getitem__(self, key: indexing.ExplicitIndexer) -> np.typing.ArrayLike:
83+
# print(f"**** BoutADIOSBackendArray.__getitem__: {self.varname} key = {key}")
84+
85+
return indexing.explicit_indexing_adapter(
86+
key,
87+
self.shape,
88+
indexing.IndexingSupport.BASIC,
89+
self._raw_indexing_method,
90+
)
91+
92+
def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike:
93+
# print(f"****BoutADIOSBackendArray._raw_indexing_method: {self.varname} "
94+
# f"key = {key} steps = {self.steps}")
95+
# print(f" data shape {data.shape}")
96+
97+
# thread safe method that access to data on disk needed because
98+
# adios is not thread safe even for reading
99+
# with self.lock:
100+
start = []
101+
count = []
102+
dimid = 0
103+
first_sl = True
104+
for sl in key:
105+
if isinstance(sl, slice):
106+
if sl.start == None:
107+
st = 0
108+
else:
109+
st = sl.start
110+
111+
if sl.stop == None:
112+
ct = self.shape[dimid] - st
113+
else:
114+
ct = sl.stop - st
115+
116+
if sl.step != 1 and sl.step != None:
117+
msg = (
118+
"The indexing operation with step != 1 you are attempting to perform "
119+
"is not valid on ADIOS2.Variable object. "
120+
)
121+
raise IndexError(msg)
122+
else:
123+
st = sl - 1
124+
ct = 1
125+
126+
if self.steps > 1 and first_sl: # key[0] is the step selection
127+
# print(f" data step selection start = {st} count = {ct}")
128+
self.adiosvar.set_step_selection([st, ct])
129+
else:
130+
start.append(st)
131+
count.append(ct)
132+
dimid += 1
133+
first_sl = False
134+
# print(f" data selection start = {start} count = {count}")
135+
self.adiosvar.set_selection([start, count])
136+
137+
data = self.fh.read(self.adiosvar)
138+
if self.steps > 1:
139+
# ADIOS does not have time dimension. Read returns n-dim array
140+
# with the steps included in the first dimension
141+
dim0 = int(data.shape[0] / self.steps)
142+
if data.shape[0] % self.steps != 0:
143+
print(
144+
f"ERROR in BoutADIOSBackendArray: first dimension problem "
145+
f"with handling steps. Variable name={self.varname} "
146+
f"shape={data.shape}, steps={self.steps}"
147+
)
148+
data = data.reshape((self.steps, dim0) + data.shape[1:])
149+
return data
150+
151+
152+
def attrs_of_var(varname: str, items: ItemsView, separator: str = "/"):
153+
"""Return attributes whose name starts with a variable's name"""
154+
return [(key, value) for key, value in items if key.startswith(varname + separator)]
155+
156+
157+
# pylint: disable=R0902 # Too many instance attributes
158+
# pylint: disable=R0912 # Too many branches
159+
# pylint: disable=E1121 # too-many-function-args
160+
class BoutAdiosBackendEntrypoint(BackendEntrypoint):
161+
"""
162+
Backend for ".bp" folders based on the adios2 package.
163+
164+
For more information about the underlying library, visit:
165+
https://adios2.readthedocs.io/en/stable
166+
167+
See Also
168+
--------
169+
backends.AdiosStore
170+
"""
171+
172+
description = "Open ADIOS2 files/folders (.bp) using adios2 in Xarray"
173+
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ZarrBackendEntrypoint.html"
174+
175+
def __init__(self):
176+
self._fh = None
177+
178+
def close():
179+
"""Close the ADIOS file"""
180+
# print("BoutAdiosBackendEntrypoint.close() called")
181+
# Note that this is a strange method without 'self', so we cannot close the file because
182+
# we don't have any handle to it
183+
# if self._fh is not None:
184+
# self._fh.close()
185+
186+
def guess_can_open(
187+
self,
188+
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
189+
) -> bool:
190+
if isinstance(filename_or_obj, (str, os.PathLike)):
191+
_, ext = os.path.splitext(filename_or_obj)
192+
return ext in {".bp"}
193+
194+
return False
195+
196+
def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs
197+
self,
198+
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
199+
*,
200+
# mask_and_scale=True,
201+
# decode_times=True,
202+
# concat_characters=True,
203+
# decode_coords=True,
204+
drop_variables: str | Iterable[str] | None = None,
205+
# use_cftime=None,
206+
# decode_timedelta=None,
207+
# group=None,
208+
# mode="r",
209+
# synchronizer=None,
210+
# consolidated=None,
211+
# chunk_store=None,
212+
# storage_options=None,
213+
# stacklevel=3,
214+
# adios_version=None,
215+
) -> Dataset:
216+
filename_or_obj = _normalize_path(filename_or_obj)
217+
# print(f"BoutAdiosBackendEntrypoint: path = {filename_or_obj} type = {type(filename_or_obj)}")
218+
219+
# if isinstance(filename_or_obj, os.PathLike):
220+
# print(f" os.PathLike: {os.fspath(filename_or_obj)}")
221+
#
222+
# if isinstance(filename_or_obj, str):
223+
# print(f" str: {os.path.abspath(os.path.expanduser(filename_or_obj))}")
224+
225+
# if isinstance(filename_or_obj, BufferedIOBase):
226+
# raise ValueError("ADIOS2 does not support BufferedIOBase input")
227+
#
228+
# if isinstance(filename_or_obj, AbstractDataStore):
229+
# raise ValueError("ADIOS2 does not support AbstractDataStore input")
230+
231+
self._fh = FileReader(filename_or_obj)
232+
vars = self._fh.available_variables()
233+
attrs = self._fh.available_attributes()
234+
attr_items = attrs.items()
235+
# print(f"BoutAdiosBackendEntrypoint: {len(vars)} variables, {len(attrs)} attributes")
236+
xvars = {}
237+
238+
for varname, varinfo in vars.items():
239+
if drop_variables is not None and varname in drop_variables:
240+
continue
241+
shape_str = varinfo["Shape"].split(", ")
242+
if shape_str[0]:
243+
shape_list = list(map(int, shape_str))
244+
else:
245+
shape_list = []
246+
shape_str = []
247+
steps = int(varinfo["AvailableStepsCount"])
248+
# print(f"{varinfo['Type']} {varname}\t {shape_list}")
249+
varattrs = attrs_of_var(varname, attr_items, "/")
250+
dims = None
251+
vlen = len(varname) + 1 # include /
252+
xattrs = {}
253+
for aname, ainfo in varattrs:
254+
# print(f"\t{ainfo['Type']} {aname}\t = {ainfo['Value']}")
255+
attr_value = self._fh.read_attribute(aname)
256+
if aname == varname + "/__xarray_dimensions__":
257+
dims = attr_value
258+
# print(f"\t\tDIMENSIONS = {dims}")
259+
else:
260+
xattrs[aname[vlen:]] = attr_value
261+
attrs.pop(aname)
262+
# print(f"\txattrs = {xattrs}")
263+
264+
# Create the xarray variable
265+
if dims is None:
266+
dims = shape_str
267+
if shape_list != []:
268+
# for i in range(len(shape_str)):
269+
# shape_str[i] = "d" + shape_str[i]
270+
if steps > 1:
271+
shape_list.insert(0, steps)
272+
dims.insert(0, "t")
273+
# print(f"\tAdd time to shape {shape_list} {dims}")
274+
nptype = np.dtype(adios_to_numpy_type[varinfo["Type"]])
275+
xdata = indexing.LazilyIndexedArray(
276+
BoutADIOSBackendArray(shape_list, nptype, None, self._fh, varname)
277+
)
278+
# print(f"\tDefine VARIABLE {varname} with dims {dims}")
279+
xvar = Variable(dims, xdata, attrs=xattrs, encoding={"dtype": nptype})
280+
# print(f"{xvar.dtype} {xvar.attrs["name"]} {xvar.dims} {xvar.encoding}")
281+
else:
282+
if steps > 1:
283+
avar = self._fh.inquire_variable(varname)
284+
avar.set_step_selection([0, avar.steps()])
285+
data = self._fh.read(avar)
286+
# print(f"\tCreate timed scalar variable {varname}")
287+
xvar = Variable("t", data, attrs=xattrs, encoding={"dtype": data.dtype})
288+
else:
289+
data = self._fh.read(varname)
290+
if varinfo["Type"] == "string":
291+
# print(f"\tCreate string scalar variable {varname}")
292+
xvar = Variable([], data, attrs=xattrs, encoding=None)
293+
else:
294+
# print(f"\tCreate scalar variable {varname}")
295+
xvar = Variable([], data, attrs=xattrs, encoding=None)
296+
xvars[varname] = xvar
297+
# print(f"--- {xvar}")
298+
299+
for attname, attinfo in attrs.items():
300+
print(f"{attinfo['Type']} {attname}\t = {attinfo['Value']}")
301+
302+
ds = Dataset(xvars, None, None)
303+
ds.set_close(BoutAdiosBackendEntrypoint.close)
304+
return ds

0 commit comments

Comments
 (0)