@@ -68,11 +68,13 @@ def get_hdf5_attrs(
6868 dataset : Optional [str ] = None ,
6969 swmr : bool = SWMR_DEFAULT ,
7070 libver : str = "latest" ,
71- ** kwargs : Optional [Any ] ,
71+ locking : Optional [Union [ bool , str ]] = None ,
7272) -> JSON :
7373 """Get attributes of an HDF5 dataset"""
7474 file_path = path_from_uri (file_uri )
75- with h5open (file_path , dataset = dataset , swmr = swmr , libver = libver , ** kwargs ) as node :
75+ with h5open (
76+ file_path , dataset = dataset , swmr = swmr , libver = libver , locking = locking
77+ ) as node :
7678 d = dict (getattr (node , "attrs" , {}))
7779 for k , v in d .items ():
7880 # Convert any bytes to str.
@@ -138,6 +140,7 @@ def lazy_load_hdf5_array(
138140 dataset : Optional [str ] = None ,
139141 swmr : bool = SWMR_DEFAULT ,
140142 libver : str = "latest" ,
143+ locking : Optional [Union [bool , str ]] = None ,
141144 ) -> dask .array .Array :
142145 """Lazily load arrays from possibly multiple HDF5 files and concatenate them along the first axis
143146
@@ -153,17 +156,21 @@ def lazy_load_hdf5_array(
153156 Whether to open the files in single-writer multiple-reader mode
154157 libver : str
155158 The HDF5 library version to use
159+ locking : bool
160+ Whether to use file locking when accessing the files
156161 """
157162
158163 # Define helper functions for reading and getting specs of HDF5 arrays with dask.delayed
159164 def _read_hdf5_array (fpath : Union [str , Path ]) -> NDArray [Any ]:
160- f = h5py .File (fpath , "r" , swmr = swmr , libver = libver )
165+ f = h5py .File (fpath , "r" , swmr = swmr , libver = libver , locking = locking )
161166 return f [dataset ] if dataset else f
162167
163168 def _get_hdf5_specs (
164169 fpath : Union [str , Path ]
165170 ) -> Tuple [Tuple [int , ...], Union [Tuple [int , ...], None ], numpy .dtype ]:
166- with h5open (fpath , dataset , swmr = swmr , libver = libver ) as ds :
171+ with h5open (
172+ fpath , dataset , swmr = swmr , libver = libver , locking = locking
173+ ) as ds :
167174 result = ds .shape , ds .chunks , ds .dtype
168175 return result
169176
@@ -187,7 +194,11 @@ def _get_hdf5_specs(
187194 if check_str_dtype .length is None :
188195 # TODO: refactor and test
189196 with h5open (
190- file_paths [0 ], dataset = dataset , swmr = swmr , libver = libver
197+ file_paths [0 ],
198+ dataset = dataset ,
199+ swmr = swmr ,
200+ libver = libver ,
201+ locking = locking ,
191202 ) as value :
192203 dataset_names = value .file [value .file .name + "/" + dataset ][...][()]
193204 if value .size == 1 :
@@ -226,7 +237,7 @@ def from_catalog(
226237 squeeze : Optional [bool ] = False ,
227238 swmr : bool = SWMR_DEFAULT ,
228239 libver : str = "latest" ,
229- ** kwargs : Optional [Any ] ,
240+ locking : Optional [Union [ bool , str ]] = None ,
230241 ) -> "HDF5ArrayAdapter" :
231242 structure = data_source .structure
232243 assets = data_source .assets
@@ -236,7 +247,7 @@ def from_catalog(
236247 file_paths = [path_from_uri (uri ) for uri in data_uris ]
237248
238249 array = cls .lazy_load_hdf5_array (
239- * file_paths , dataset = dataset , swmr = swmr , libver = libver
250+ * file_paths , dataset = dataset , swmr = swmr , libver = libver , locking = locking
240251 )
241252
242253 if slice :
@@ -263,7 +274,9 @@ def from_catalog(
263274 # Pull additional metadata from the file attributes
264275 metadata = copy .deepcopy (node .metadata_ )
265276 metadata .update (
266- get_hdf5_attrs (data_uris [0 ], dataset , swmr = swmr , libver = libver , ** kwargs )
277+ get_hdf5_attrs (
278+ data_uris [0 ], dataset , swmr = swmr , libver = libver , locking = locking
279+ )
267280 )
268281
269282 return cls (
@@ -282,12 +295,12 @@ def from_uris(
282295 squeeze : bool = False ,
283296 swmr : bool = SWMR_DEFAULT ,
284297 libver : str = "latest" ,
285- ** kwargs : Optional [Any ] ,
298+ locking : Optional [Union [ bool , str ]] = None ,
286299 ) -> "HDF5ArrayAdapter" :
287300 file_paths = [path_from_uri (uri ) for uri in data_uris ]
288301
289302 array = cls .lazy_load_hdf5_array (
290- * file_paths , dataset = dataset , swmr = swmr , libver = libver
303+ * file_paths , dataset = dataset , swmr = swmr , libver = libver , locking = locking
291304 )
292305
293306 # Apply slice and squeeze operations, if specified
@@ -301,7 +314,7 @@ def from_uris(
301314 # Construct the structure and pull additional metadata from the file attributes
302315 structure = ArrayStructure .from_array (array )
303316 metadata = get_hdf5_attrs (
304- data_uris [0 ], dataset , swmr = swmr , libver = libver , ** kwargs
317+ data_uris [0 ], dataset , swmr = swmr , libver = libver , locking = locking
305318 )
306319
307320 return cls (array , structure , metadata = metadata )
@@ -361,7 +374,7 @@ def __init__(
361374 self ._tree : dict [str , Any ] = tree # type: ignore
362375 self .uris = data_uris
363376 self .dataset = dataset # Referenced to the root of the file
364- self ._kwargs = kwargs # e.g. swmr, libver, etc.
377+ self ._kwargs = kwargs # e.g. swmr, libver, locking, etc.
365378 super ().__init__ (
366379 structure = ContainerStructure (keys = list (self ._tree .keys ())),
367380 metadata = metadata ,
@@ -375,13 +388,11 @@ def from_catalog(
375388 data_source : DataSource [Union [ArrayStructure , None ]],
376389 node : Node ,
377390 / ,
378- dataset : Optional [ Union [str , list [str ]]] = None ,
391+ dataset : Union [str , list [str ]] = "/" ,
379392 swmr : bool = SWMR_DEFAULT ,
380393 libver : str = "latest" ,
381- ** kwargs : Optional [Any ] ,
394+ locking : Optional [Union [ bool , str ]] = None ,
382395 ) -> Union ["HDF5Adapter" , HDF5ArrayAdapter ]:
383- # Convert the dataset representation (for backward compatibility)
384- dataset = dataset or kwargs .get ("path" ) or []
385396 if not isinstance (dataset , str ):
386397 dataset = "/" .join (dataset )
387398
@@ -393,7 +404,7 @@ def from_catalog(
393404 dataset = dataset ,
394405 swmr = swmr ,
395406 libver = libver ,
396- ** kwargs ,
407+ locking = locking ,
397408 )
398409
399410 # Initialize adapter for the entire HDF5 tree
@@ -405,7 +416,9 @@ def from_catalog(
405416 ast .data_uri for ast in assets if ast .parameter == "data_uris"
406417 ] or [assets [0 ].data_uri ]
407418 file_path = path_from_uri (data_uris [0 ])
408- with h5open (file_path , dataset , swmr = swmr , libver = libver ) as file :
419+ with h5open (
420+ file_path , dataset , swmr = swmr , libver = libver , locking = locking
421+ ) as file :
409422 tree = parse_hdf5_tree (file )
410423
411424 if tree == HDF5_DATASET :
@@ -422,7 +435,7 @@ def from_catalog(
422435 specs = node .specs ,
423436 swmr = swmr ,
424437 libver = libver ,
425- ** kwargs ,
438+ locking = locking ,
426439 )
427440
428441 @classmethod
@@ -432,19 +445,19 @@ def from_uris(
432445 dataset : Optional [str ] = None ,
433446 swmr : bool = SWMR_DEFAULT ,
434447 libver : str = "latest" ,
435- ** kwargs : Optional [Any ] ,
448+ locking : Optional [Union [ bool , str ]] = None ,
436449 ) -> Union ["HDF5Adapter" , HDF5ArrayAdapter ]:
437450 fpath = path_from_uri (data_uris [0 ])
438- with h5open (fpath , dataset , swmr = swmr , libver = libver ) as file :
451+ with h5open (fpath , dataset , swmr = swmr , libver = libver , locking = locking ) as file :
439452 tree = parse_hdf5_tree (file )
440453
441454 if tree == HDF5_DATASET :
442455 return HDF5ArrayAdapter .from_uris (
443- * data_uris , dataset = dataset , swmr = swmr , libver = libver , ** kwargs # type: ignore
456+ * data_uris , dataset = dataset , swmr = swmr , libver = libver , locking = locking
444457 )
445458
446459 return cls (
447- tree , * data_uris , dataset = dataset , swmr = swmr , libver = libver , ** kwargs
460+ tree , * data_uris , dataset = dataset , swmr = swmr , libver = libver , locking = locking
448461 )
449462
450463 def __repr__ (self ) -> str :
0 commit comments