|
| 1 | +"""License: |
| 2 | + Distributed under the OSI-approved Apache License, Version 2.0. See |
| 3 | + accompanying file Copyright.txt for details. |
| 4 | +""" |
| 5 | + |
| 6 | +from __future__ import annotations |
| 7 | + |
| 8 | +import os |
| 9 | + |
| 10 | +# import warnings |
| 11 | + |
| 12 | +from collections.abc import Iterable |
| 13 | +from typing import TYPE_CHECKING, Any, ItemsView |
| 14 | + |
| 15 | +import numpy as np |
| 16 | +from adios2 import FileReader |
| 17 | + |
| 18 | +from xarray import Dataset, Variable |
| 19 | +from xarray.backends.common import ( |
| 20 | + BackendArray, |
| 21 | + BackendEntrypoint, |
| 22 | + _normalize_path, |
| 23 | +) |
| 24 | + |
| 25 | +from xarray.core import indexing |
| 26 | + |
| 27 | +if TYPE_CHECKING: |
| 28 | + from io import BufferedIOBase |
| 29 | + from xarray.backends.common import AbstractDataStore |
| 30 | + |
| 31 | +# need some special secret attributes to tell us the dimensions |
| 32 | +DIMENSION_KEY = "time_dimension" |
| 33 | + |
| 34 | +adios_to_numpy_type = { |
| 35 | + "char": np.char, |
| 36 | + "int8_t": np.int8, |
| 37 | + "int16_t": np.int16, |
| 38 | + "int32_t": np.int32, |
| 39 | + "int64_t": np.int64, |
| 40 | + "uint8_t": np.uint8, |
| 41 | + "uint16_t": np.uint16, |
| 42 | + "uint32_t": np.uint32, |
| 43 | + "uint64_t": np.uint64, |
| 44 | + "float": float, |
| 45 | + "double": np.double, |
| 46 | + "long double": np.longdouble, |
| 47 | + "float complex": np.complex64, |
| 48 | + "double complex": np.complex128, |
| 49 | + "string": np.char, |
| 50 | +} |
| 51 | + |
| 52 | + |
| 53 | +class BoutADIOSBackendArray(BackendArray): |
| 54 | + """ADIOS2 backend for lazily indexed arrays""" |
| 55 | + |
| 56 | + def __init__( |
| 57 | + self, shape: list, dtype: np.dtype, lock, adiosfile: FileReader, varname: str |
| 58 | + ): |
| 59 | + self.shape = shape |
| 60 | + self.dtype = dtype |
| 61 | + self.lock = lock |
| 62 | + self.fh = adiosfile |
| 63 | + self.varname = varname |
| 64 | + self.adiosvar = self.fh.inquire_variable(varname) |
| 65 | + self.steps = self.adiosvar.steps() |
| 66 | + # print(f"BoutADIOSBackendArray.__init__: {dtype} {varname} {shape} {dtype.itemsize}") |
| 67 | + |
| 68 | + def __getitem__(self, key: indexing.ExplicitIndexer) -> np.typing.ArrayLike: |
| 69 | + # print(f"**** BoutADIOSBackendArray.__getitem__: {self.varname} key = {key}") |
| 70 | + |
| 71 | + return indexing.explicit_indexing_adapter( |
| 72 | + key, |
| 73 | + self.shape, |
| 74 | + indexing.IndexingSupport.BASIC, |
| 75 | + self._raw_indexing_method, |
| 76 | + ) |
| 77 | + |
| 78 | + def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: |
| 79 | + # print(f"****BoutADIOSBackendArray._raw_indexing_method: {self.varname} " |
| 80 | + # f"key = {key} steps = {self.steps}") |
| 81 | + # print(f" data shape {data.shape}") |
| 82 | + |
| 83 | + # thread safe method that access to data on disk needed because |
| 84 | + # adios is not thread safe even for reading |
| 85 | + # with self.lock: |
| 86 | + start = [] |
| 87 | + count = [] |
| 88 | + dimid = 0 |
| 89 | + first_sl = True |
| 90 | + for sl in key: |
| 91 | + if isinstance(sl, slice): |
| 92 | + if sl.start is None: |
| 93 | + st = 0 |
| 94 | + else: |
| 95 | + st = sl.start |
| 96 | + |
| 97 | + if sl.stop is None: |
| 98 | + ct = self.shape[dimid] - st |
| 99 | + else: |
| 100 | + ct = sl.stop - st |
| 101 | + |
| 102 | + if sl.step != 1 and sl.step is not None: |
| 103 | + msg = ( |
| 104 | + "The indexing operation with step != 1 you are attempting to perform " |
| 105 | + "is not valid on ADIOS2.Variable object. " |
| 106 | + ) |
| 107 | + raise IndexError(msg) |
| 108 | + else: |
| 109 | + st = sl - 1 |
| 110 | + ct = 1 |
| 111 | + |
| 112 | + if self.steps > 1 and first_sl: # key[0] is the step selection |
| 113 | + # print(f" data step selection start = {st} count = {ct}") |
| 114 | + self.adiosvar.set_step_selection([st, ct]) |
| 115 | + else: |
| 116 | + start.append(st) |
| 117 | + count.append(ct) |
| 118 | + dimid += 1 |
| 119 | + first_sl = False |
| 120 | + # print(f" data selection start = {start} count = {count}") |
| 121 | + self.adiosvar.set_selection([start, count]) |
| 122 | + |
| 123 | + data = self.fh.read(self.adiosvar) |
| 124 | + if self.steps > 1: |
| 125 | + # ADIOS does not have time dimension. Read returns n-dim array |
| 126 | + # with the steps included in the first dimension |
| 127 | + dim0 = int(data.shape[0] / self.steps) |
| 128 | + if data.shape[0] % self.steps != 0: |
| 129 | + print( |
| 130 | + f"ERROR in BoutADIOSBackendArray: first dimension problem " |
| 131 | + f"with handling steps. Variable name={self.varname} " |
| 132 | + f"shape={data.shape}, steps={self.steps}" |
| 133 | + ) |
| 134 | + data = data.reshape((self.steps, dim0) + data.shape[1:]) |
| 135 | + return data |
| 136 | + |
| 137 | + |
| 138 | +def attrs_of_var(varname: str, items: ItemsView, separator: str = "/"): |
| 139 | + """Return attributes whose name starts with a variable's name""" |
| 140 | + return [(key, value) for key, value in items if key.startswith(varname + separator)] |
| 141 | + |
| 142 | + |
| 143 | +# pylint: disable=R0902 # Too many instance attributes |
| 144 | +# pylint: disable=R0912 # Too many branches |
| 145 | +# pylint: disable=E1121 # too-many-function-args |
| 146 | +class BoutAdiosBackendEntrypoint(BackendEntrypoint): |
| 147 | + """ |
| 148 | + Backend for ".bp" folders based on the adios2 package. |
| 149 | +
|
| 150 | + For more information about the underlying library, visit: |
| 151 | + https://adios2.readthedocs.io/en/stable |
| 152 | +
|
| 153 | + See Also |
| 154 | + -------- |
| 155 | + backends.AdiosStore |
| 156 | + """ |
| 157 | + |
| 158 | + description = "Open ADIOS2 files/folders (.bp) using adios2 in Xarray" |
| 159 | + url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ZarrBackendEntrypoint.html" |
| 160 | + |
| 161 | + def __init__(self): |
| 162 | + self._fh = None |
| 163 | + |
| 164 | + def close(): |
| 165 | + """Close the ADIOS file""" |
| 166 | + # print("BoutAdiosBackendEntrypoint.close() called") |
| 167 | + # Note that this is a strange method without 'self', so we cannot close the file because |
| 168 | + # we don't have any handle to it |
| 169 | + # if self._fh is not None: |
| 170 | + # self._fh.close() |
| 171 | + |
| 172 | + def guess_can_open( |
| 173 | + self, |
| 174 | + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, |
| 175 | + ) -> bool: |
| 176 | + if isinstance(filename_or_obj, (str, os.PathLike)): |
| 177 | + _, ext = os.path.splitext(filename_or_obj) |
| 178 | + return ext in {".bp"} |
| 179 | + |
| 180 | + return False |
| 181 | + |
| 182 | + def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs |
| 183 | + self, |
| 184 | + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, |
| 185 | + *, |
| 186 | + # mask_and_scale=True, |
| 187 | + # decode_times=True, |
| 188 | + # concat_characters=True, |
| 189 | + # decode_coords=True, |
| 190 | + drop_variables: str | Iterable[str] | None = None, |
| 191 | + # use_cftime=None, |
| 192 | + # decode_timedelta=None, |
| 193 | + # group=None, |
| 194 | + # mode="r", |
| 195 | + # synchronizer=None, |
| 196 | + # consolidated=None, |
| 197 | + # chunk_store=None, |
| 198 | + # storage_options=None, |
| 199 | + # stacklevel=3, |
| 200 | + # adios_version=None, |
| 201 | + ) -> Dataset: |
| 202 | + filename_or_obj = _normalize_path(filename_or_obj) |
| 203 | + # print(f"BoutAdiosBackendEntrypoint: path = {filename_or_obj} type = {type(filename_or_obj)}") |
| 204 | + |
| 205 | + # if isinstance(filename_or_obj, os.PathLike): |
| 206 | + # print(f" os.PathLike: {os.fspath(filename_or_obj)}") |
| 207 | + # |
| 208 | + # if isinstance(filename_or_obj, str): |
| 209 | + # print(f" str: {os.path.abspath(os.path.expanduser(filename_or_obj))}") |
| 210 | + |
| 211 | + # if isinstance(filename_or_obj, BufferedIOBase): |
| 212 | + # raise ValueError("ADIOS2 does not support BufferedIOBase input") |
| 213 | + # |
| 214 | + # if isinstance(filename_or_obj, AbstractDataStore): |
| 215 | + # raise ValueError("ADIOS2 does not support AbstractDataStore input") |
| 216 | + |
| 217 | + self._fh = FileReader(filename_or_obj) |
| 218 | + vars = self._fh.available_variables() |
| 219 | + attrs = self._fh.available_attributes() |
| 220 | + attr_items = attrs.items() |
| 221 | + # print(f"BoutAdiosBackendEntrypoint: {len(vars)} variables, {len(attrs)} attributes") |
| 222 | + xvars = {} |
| 223 | + |
| 224 | + for varname, varinfo in vars.items(): |
| 225 | + if drop_variables is not None and varname in drop_variables: |
| 226 | + continue |
| 227 | + shape_str = varinfo["Shape"].split(", ") |
| 228 | + if shape_str[0]: |
| 229 | + shape_list = list(map(int, shape_str)) |
| 230 | + else: |
| 231 | + shape_list = [] |
| 232 | + shape_str = [] |
| 233 | + steps = int(varinfo["AvailableStepsCount"]) |
| 234 | + # print(f"{varinfo['Type']} {varname}\t {shape_list}") |
| 235 | + varattrs = attrs_of_var(varname, attr_items, "/") |
| 236 | + dims = None |
| 237 | + vlen = len(varname) + 1 # include / |
| 238 | + xattrs = {} |
| 239 | + for aname, ainfo in varattrs: |
| 240 | + # print(f"\t{ainfo['Type']} {aname}\t = {ainfo['Value']}") |
| 241 | + attr_value = self._fh.read_attribute(aname) |
| 242 | + if aname == varname + "/__xarray_dimensions__": |
| 243 | + dims = attr_value |
| 244 | + # print(f"\t\tDIMENSIONS = {dims}") |
| 245 | + else: |
| 246 | + xattrs[aname[vlen:]] = attr_value |
| 247 | + attrs.pop(aname) |
| 248 | + # print(f"\txattrs = {xattrs}") |
| 249 | + |
| 250 | + # Create the xarray variable |
| 251 | + if dims is None: |
| 252 | + dims = shape_str |
| 253 | + if shape_list != []: |
| 254 | + # for i in range(len(shape_str)): |
| 255 | + # shape_str[i] = "d" + shape_str[i] |
| 256 | + if steps > 1: |
| 257 | + shape_list.insert(0, steps) |
| 258 | + dims.insert(0, "t") |
| 259 | + # print(f"\tAdd time to shape {shape_list} {dims}") |
| 260 | + nptype = np.dtype(adios_to_numpy_type[varinfo["Type"]]) |
| 261 | + xdata = indexing.LazilyIndexedArray( |
| 262 | + BoutADIOSBackendArray(shape_list, nptype, None, self._fh, varname) |
| 263 | + ) |
| 264 | + # print(f"\tDefine VARIABLE {varname} with dims {dims}") |
| 265 | + xvar = Variable(dims, xdata, attrs=xattrs, encoding={"dtype": nptype}) |
| 266 | + # print(f"{xvar.dtype} {xvar.attrs["name"]} {xvar.dims} {xvar.encoding}") |
| 267 | + else: |
| 268 | + if steps > 1: |
| 269 | + avar = self._fh.inquire_variable(varname) |
| 270 | + avar.set_step_selection([0, avar.steps()]) |
| 271 | + data = self._fh.read(avar) |
| 272 | + # print(f"\tCreate timed scalar variable {varname}") |
| 273 | + xvar = Variable( |
| 274 | + "t", data, attrs=xattrs, encoding={"dtype": data.dtype} |
| 275 | + ) |
| 276 | + else: |
| 277 | + data = self._fh.read(varname) |
| 278 | + if varinfo["Type"] == "string": |
| 279 | + # print(f"\tCreate string scalar variable {varname}") |
| 280 | + xvar = Variable([], data, attrs=xattrs, encoding=None) |
| 281 | + else: |
| 282 | + # print(f"\tCreate scalar variable {varname}") |
| 283 | + xvar = Variable([], data, attrs=xattrs, encoding=None) |
| 284 | + xvars[varname] = xvar |
| 285 | + # print(f"--- {xvar}") |
| 286 | + |
| 287 | + for attname, attinfo in attrs.items(): |
| 288 | + print(f"{attinfo['Type']} {attname}\t = {attinfo['Value']}") |
| 289 | + |
| 290 | + ds = Dataset(xvars, None, None) |
| 291 | + ds.set_close(BoutAdiosBackendEntrypoint.close) |
| 292 | + return ds |
0 commit comments