Skip to content

Commit ce51787

Browse files
Merge pull request #107 from zequihg50/main
Added partial support for compact datasets.
2 parents c78cc8d + df0f8ec commit ce51787

File tree

2 files changed

+73
-3
lines changed

2 files changed

+73
-3
lines changed

pyfive/h5d.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def __init__(self, dataobject, pseudo_chunking_size_MB=4):
100100
# throws a flake8 wobbly for Python<3.10; match is Py3.10+ syntax
101101
match self.layout_class: # noqa
102102
case 0: #compact storage
103-
raise NotImplementedError("Compact Storage")
103+
self._data = self._get_compact_data(dataobject)
104104
case 1: # contiguous storage
105105
self.data_offset, = struct.unpack_from('<Q', dataobject.msg_data, self.property_offset)
106106
case 2: # chunked storage
@@ -156,14 +156,14 @@ def read_direct_chunk(self, chunk_position, **kwargs):
156156
raise OSError("Chunk coordinates must lie on chunk boundaries")
157157
storeinfo = self._index[chunk_position]
158158
return storeinfo.filter_mask, self._get_raw_chunk(storeinfo)
159-
159+
160160
def get_data(self, args, fillvalue):
161161
""" Called by the dataset getitem method """
162162
dtype = self._dtype
163163
# throws a flake8 wobbly for Python<3.10; match is Py3.10+ syntax
164164
match self.layout_class: # noqa
165165
case 0: #compact storage
166-
raise NotImplementedError("Compact Storage")
166+
return self._read_compact_data(args, fillvalue)
167167
case 1: # contiguous storage
168168
if self.data_offset == UNDEFINED_ADDRESS:
169169
# no storage is backing array, return an array of
@@ -375,6 +375,40 @@ def _get_contiguous_data(self, args):
375375
except UnsupportedOperation:
376376
return self._get_direct_from_contiguous(args)
377377

378+
def _get_compact_data(self, dataobject):
379+
data = None
380+
layout = None
381+
for msg in dataobject.msgs:
382+
if msg["type"] == 8:
383+
layout = msg
384+
break
385+
if layout is None:
386+
raise ValueError("No layout message in compact dataset?")
387+
byts = dataobject.msg_data[msg["offset_to_message"]:msg["offset_to_message"]+msg["size"]]
388+
layout_version = byts[0]
389+
if layout_version == 1 or layout_version == 2:
390+
raise NotImplementedError("Compact layout v1 and v2.")
391+
elif layout_version == 3 or layout_version == 4:
392+
size = int.from_bytes(byts[2:4], "little")
393+
data = byts[4:4+size]
394+
else:
395+
raise ValueError("Unknown layout version.")
396+
return data
397+
398+
def _read_compact_data(self, args, fillvalue):
399+
if self._data is None:
400+
if isinstance(self._dtype, tuple):
401+
dtype = np.array(fillvalue).dtype
402+
return np.full(self.shape, fillvalue, dtype=dtype)[args]
403+
else:
404+
view = np.frombuffer(
405+
self._data,
406+
dtype=self._dtype,
407+
).reshape(self.shape)
408+
# Create the sub-array
409+
result = view[args]
410+
return result
411+
378412

379413
def _get_direct_from_contiguous(self, args=None):
380414
"""

tests/test_compact.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import os
2+
3+
import numpy as np
4+
import pytest
5+
from numpy.testing import assert_array_equal
6+
7+
import pyfive
8+
import h5py
9+
10+
11+
def test_compact_dataset_hdf5(name, data):
12+
with pyfive.File(name) as hfile:
13+
# check data
14+
dset1 = hfile['compact']
15+
assert_array_equal(dset1[...], data)
16+
17+
18+
@pytest.fixture(scope='module')
19+
def data():
20+
return np.array([1, 2, 3, 4], dtype=np.int32)
21+
22+
23+
@pytest.fixture(scope='module')
24+
def name(data):
25+
name = os.path.join(os.path.dirname(__file__), 'compact.hdf5')
26+
27+
f = h5py.File(name, 'w', libver='earliest')
28+
dtype = h5py.h5t.NATIVE_INT32
29+
space = h5py.h5s.create_simple(data.shape)
30+
dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
31+
dcpl.set_layout(h5py.h5d.COMPACT)
32+
dset_id = h5py.h5d.create(f.id, b"compact", dtype, space, dcpl=dcpl)
33+
dset_id.write(h5py.h5s.ALL, h5py.h5s.ALL, data)
34+
f.close()
35+
36+
return name

0 commit comments

Comments
 (0)