Skip to content

Commit 75b9fd2

Browse files
genematxjmaruland
authored andcommitted
Add 'locking' parameter to hdf5 adapter (bluesky#1164)
* ENH: add 'locking' parameter to hdf5 adapter * MNT: update changelog * TYP: allow str in the 'locking' kwarg
1 parent dbbc4b9 commit 75b9fd2

File tree

2 files changed

+40
-23
lines changed

2 files changed

+40
-23
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ Write the date in place of the "Unreleased" in the case a new version is release
99

1010
## Unreleased
1111

12+
### Added
13+
14+
- `locking` key-word argument in HDFAdapter and HDF5Adapter.
15+
1216
### Fixed
1317

1418
- Column names in `TableStructure` are explicitly converted to strings.

tiled/adapters/hdf5.py

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,13 @@ def get_hdf5_attrs(
6868
dataset: Optional[str] = None,
6969
swmr: bool = SWMR_DEFAULT,
7070
libver: str = "latest",
71-
**kwargs: Optional[Any],
71+
locking: Optional[Union[bool, str]] = None,
7272
) -> JSON:
7373
"""Get attributes of an HDF5 dataset"""
7474
file_path = path_from_uri(file_uri)
75-
with h5open(file_path, dataset=dataset, swmr=swmr, libver=libver, **kwargs) as node:
75+
with h5open(
76+
file_path, dataset=dataset, swmr=swmr, libver=libver, locking=locking
77+
) as node:
7678
d = dict(getattr(node, "attrs", {}))
7779
for k, v in d.items():
7880
# Convert any bytes to str.
@@ -138,6 +140,7 @@ def lazy_load_hdf5_array(
138140
dataset: Optional[str] = None,
139141
swmr: bool = SWMR_DEFAULT,
140142
libver: str = "latest",
143+
locking: Optional[Union[bool, str]] = None,
141144
) -> dask.array.Array:
142145
"""Lazily load arrays from possibly multiple HDF5 files and concatenate them along the first axis
143146
@@ -153,17 +156,21 @@ def lazy_load_hdf5_array(
153156
Whether to open the files in single-writer multiple-reader mode
154157
libver : str
155158
The HDF5 library version to use
159+
locking : bool
160+
Whether to use file locking when accessing the files
156161
"""
157162

158163
# Define helper functions for reading and getting specs of HDF5 arrays with dask.delayed
159164
def _read_hdf5_array(fpath: Union[str, Path]) -> NDArray[Any]:
160-
f = h5py.File(fpath, "r", swmr=swmr, libver=libver)
165+
f = h5py.File(fpath, "r", swmr=swmr, libver=libver, locking=locking)
161166
return f[dataset] if dataset else f
162167

163168
def _get_hdf5_specs(
164169
fpath: Union[str, Path]
165170
) -> Tuple[Tuple[int, ...], Union[Tuple[int, ...], None], numpy.dtype]:
166-
with h5open(fpath, dataset, swmr=swmr, libver=libver) as ds:
171+
with h5open(
172+
fpath, dataset, swmr=swmr, libver=libver, locking=locking
173+
) as ds:
167174
result = ds.shape, ds.chunks, ds.dtype
168175
return result
169176

@@ -187,7 +194,11 @@ def _get_hdf5_specs(
187194
if check_str_dtype.length is None:
188195
# TODO: refactor and test
189196
with h5open(
190-
file_paths[0], dataset=dataset, swmr=swmr, libver=libver
197+
file_paths[0],
198+
dataset=dataset,
199+
swmr=swmr,
200+
libver=libver,
201+
locking=locking,
191202
) as value:
192203
dataset_names = value.file[value.file.name + "/" + dataset][...][()]
193204
if value.size == 1:
@@ -226,7 +237,7 @@ def from_catalog(
226237
squeeze: Optional[bool] = False,
227238
swmr: bool = SWMR_DEFAULT,
228239
libver: str = "latest",
229-
**kwargs: Optional[Any],
240+
locking: Optional[Union[bool, str]] = None,
230241
) -> "HDF5ArrayAdapter":
231242
structure = data_source.structure
232243
assets = data_source.assets
@@ -236,7 +247,7 @@ def from_catalog(
236247
file_paths = [path_from_uri(uri) for uri in data_uris]
237248

238249
array = cls.lazy_load_hdf5_array(
239-
*file_paths, dataset=dataset, swmr=swmr, libver=libver
250+
*file_paths, dataset=dataset, swmr=swmr, libver=libver, locking=locking
240251
)
241252

242253
if slice:
@@ -263,7 +274,9 @@ def from_catalog(
263274
# Pull additional metadata from the file attributes
264275
metadata = copy.deepcopy(node.metadata_)
265276
metadata.update(
266-
get_hdf5_attrs(data_uris[0], dataset, swmr=swmr, libver=libver, **kwargs)
277+
get_hdf5_attrs(
278+
data_uris[0], dataset, swmr=swmr, libver=libver, locking=locking
279+
)
267280
)
268281

269282
return cls(
@@ -282,12 +295,12 @@ def from_uris(
282295
squeeze: bool = False,
283296
swmr: bool = SWMR_DEFAULT,
284297
libver: str = "latest",
285-
**kwargs: Optional[Any],
298+
locking: Optional[Union[bool, str]] = None,
286299
) -> "HDF5ArrayAdapter":
287300
file_paths = [path_from_uri(uri) for uri in data_uris]
288301

289302
array = cls.lazy_load_hdf5_array(
290-
*file_paths, dataset=dataset, swmr=swmr, libver=libver
303+
*file_paths, dataset=dataset, swmr=swmr, libver=libver, locking=locking
291304
)
292305

293306
# Apply slice and squeeze operations, if specified
@@ -301,7 +314,7 @@ def from_uris(
301314
# Construct the structure and pull additional metadata from the file attributes
302315
structure = ArrayStructure.from_array(array)
303316
metadata = get_hdf5_attrs(
304-
data_uris[0], dataset, swmr=swmr, libver=libver, **kwargs
317+
data_uris[0], dataset, swmr=swmr, libver=libver, locking=locking
305318
)
306319

307320
return cls(array, structure, metadata=metadata)
@@ -361,7 +374,7 @@ def __init__(
361374
self._tree: dict[str, Any] = tree # type: ignore
362375
self.uris = data_uris
363376
self.dataset = dataset # Referenced to the root of the file
364-
self._kwargs = kwargs # e.g. swmr, libver, etc.
377+
self._kwargs = kwargs # e.g. swmr, libver, locking, etc.
365378
super().__init__(
366379
structure=ContainerStructure(keys=list(self._tree.keys())),
367380
metadata=metadata,
@@ -375,13 +388,11 @@ def from_catalog(
375388
data_source: DataSource[Union[ArrayStructure, None]],
376389
node: Node,
377390
/,
378-
dataset: Optional[Union[str, list[str]]] = None,
391+
dataset: Union[str, list[str]] = "/",
379392
swmr: bool = SWMR_DEFAULT,
380393
libver: str = "latest",
381-
**kwargs: Optional[Any],
394+
locking: Optional[Union[bool, str]] = None,
382395
) -> Union["HDF5Adapter", HDF5ArrayAdapter]:
383-
# Convert the dataset representation (for backward compatibility)
384-
dataset = dataset or kwargs.get("path") or []
385396
if not isinstance(dataset, str):
386397
dataset = "/".join(dataset)
387398

@@ -393,7 +404,7 @@ def from_catalog(
393404
dataset=dataset,
394405
swmr=swmr,
395406
libver=libver,
396-
**kwargs,
407+
locking=locking,
397408
)
398409

399410
# Initialize adapter for the entire HDF5 tree
@@ -405,7 +416,9 @@ def from_catalog(
405416
ast.data_uri for ast in assets if ast.parameter == "data_uris"
406417
] or [assets[0].data_uri]
407418
file_path = path_from_uri(data_uris[0])
408-
with h5open(file_path, dataset, swmr=swmr, libver=libver) as file:
419+
with h5open(
420+
file_path, dataset, swmr=swmr, libver=libver, locking=locking
421+
) as file:
409422
tree = parse_hdf5_tree(file)
410423

411424
if tree == HDF5_DATASET:
@@ -422,7 +435,7 @@ def from_catalog(
422435
specs=node.specs,
423436
swmr=swmr,
424437
libver=libver,
425-
**kwargs,
438+
locking=locking,
426439
)
427440

428441
@classmethod
@@ -432,19 +445,19 @@ def from_uris(
432445
dataset: Optional[str] = None,
433446
swmr: bool = SWMR_DEFAULT,
434447
libver: str = "latest",
435-
**kwargs: Optional[Any],
448+
locking: Optional[Union[bool, str]] = None,
436449
) -> Union["HDF5Adapter", HDF5ArrayAdapter]:
437450
fpath = path_from_uri(data_uris[0])
438-
with h5open(fpath, dataset, swmr=swmr, libver=libver) as file:
451+
with h5open(fpath, dataset, swmr=swmr, libver=libver, locking=locking) as file:
439452
tree = parse_hdf5_tree(file)
440453

441454
if tree == HDF5_DATASET:
442455
return HDF5ArrayAdapter.from_uris(
443-
*data_uris, dataset=dataset, swmr=swmr, libver=libver, **kwargs # type: ignore
456+
*data_uris, dataset=dataset, swmr=swmr, libver=libver, locking=locking
444457
)
445458

446459
return cls(
447-
tree, *data_uris, dataset=dataset, swmr=swmr, libver=libver, **kwargs
460+
tree, *data_uris, dataset=dataset, swmr=swmr, libver=libver, locking=locking
448461
)
449462

450463
def __repr__(self) -> str:

0 commit comments

Comments
 (0)