From 9bf85babdfbb90bcfaa4466e646e100baa84d07d Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Tue, 21 Oct 2025 14:33:15 +0100
Subject: [PATCH 01/16] Calculate b-tree range

---
 pyfive/h5d.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pyfive/h5d.py b/pyfive/h5d.py
index 511ff80..b1b6895 100644
--- a/pyfive/h5d.py
+++ b/pyfive/h5d.py
@@ -249,6 +249,22 @@ def index(self):
             raise ValueError('No chunk index available for HDF layout class {self.layout}')
         else:
             return self._index
+
+    ##### This property is made available to help understand object store performance
+    @property
+    def btree_range(self):
+        """ A tuple with the addresses of the first b-tree node
+        for this variable, and the address of the furthest away node
+        (Which may not be the last one in the chunk index). This property
+        may be of use in understanding the read performance of chunked
+        data in object stores.  ``btree_range`` is a ``pyfive`` API extgension.
+        """
+        if self._index is None:
+             raise ValueError('No b-tree available for HDF layout class {self.layout}')
+        else:
+            return (self._btree_start, self._btree_end)
+
+
     #### The following method can be used to set pseudo chunking size after the 
     #### file has been closed and before data transactions. This is pyfive specific
     def set_pseudo_chunk_size(self, newsize_MB):
@@ -311,7 +327,10 @@ def _build_index(self, dataobject):
         self._index = {}
         self._nthindex = []
         
+        
         for node in chunk_btree.all_nodes[0]:
+            self._btree_start=node['addresses'][0]
+            self._btree_end=node['addresses'][0]
             for node_key, addr in zip(node['keys'], node['addresses']):
                 start = node_key['chunk_offset'][:-1]
                 key = start
@@ -319,6 +338,9 @@ def _build_index(self, dataobject):
                 filter_mask = node_key['filter_mask']
                 self._nthindex.append(key)
                 self._index[key] = StoreInfo(key, filter_mask, addr, size)
+                self._btree_end=max(addr,self._btree_end)
+
+
 
     def _get_contiguous_data(self, args, fillvalue):
 

From 70ad54ea0b7aa886590de5a1bde7b1d70f1d9d88 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Wed, 22 Oct 2025 13:38:06 +0100
Subject: [PATCH 02/16] Remove redudant chunk handling from btree.py (part of
 #131)

---
 pyfive/btree.py | 49 -------------------------------------------------
 1 file changed, 49 deletions(-)

diff --git a/pyfive/btree.py b/pyfive/btree.py
index 5d34258..7ce119b 100644
--- a/pyfive/btree.py
+++ b/pyfive/btree.py
@@ -151,55 +151,6 @@ def _read_node(self, offset, node_level):
         node['addresses'] = addresses
         return node
 
-    def construct_data_from_chunks(
-            self, chunk_shape, data_shape, dtype, filter_pipeline):
-        """ Build a complete data array from chunks. """
-        if isinstance(dtype, tuple):
-            true_dtype = tuple(dtype)
-            dtype_class = dtype[0]
-            if dtype_class == 'REFERENCE':
-                size = dtype[1]
-                if size != 8:
-                    raise NotImplementedError('Unsupported Reference type')
-                dtype = '<u8'
-            else:
-                raise NotImplementedError('datatype not implemented')
-        else:
-            true_dtype = None
-
-        # create array to store data
-        shape = [_padded_size(i, j) for i, j in zip(data_shape, chunk_shape)]
-        data = np.zeros(shape, dtype=dtype)
-
-        # loop over chunks reading each into the full data array
-        count = np.prod(chunk_shape)
-        itemsize = np.dtype(dtype).itemsize
-        chunk_buffer_size = count * itemsize
-        for node in self.all_nodes[0]:
-            for node_key, addr in zip(node['keys'], node['addresses']):
-                self.fh.seek(addr)
-                if filter_pipeline is None:
-                    chunk_buffer = self.fh.read(chunk_buffer_size)
-                else:
-                    chunk_buffer = self.fh.read(node_key['chunk_size'])
-                    filter_mask = node_key['filter_mask']
-                    chunk_buffer = self._filter_chunk(
-                        chunk_buffer, filter_mask, filter_pipeline, itemsize)
-
-                chunk_data = np.frombuffer(chunk_buffer, dtype=dtype)
-                start = node_key['chunk_offset'][:-1]
-                region = [slice(i, i+j) for i, j in zip(start, chunk_shape)]
-                data[tuple(region)] = chunk_data.reshape(chunk_shape)
-
-        if isinstance(true_dtype, tuple):
-            if dtype_class == 'REFERENCE':
-                to_reference = np.vectorize(Reference)
-                data = to_reference(data)
-            else:
-                raise NotImplementedError('datatype not implemented')
-
-        non_padded_region = tuple([slice(i) for i in data_shape])
-        return data[non_padded_region]
 
     @classmethod
     def _filter_chunk(cls, chunk_buffer, filter_mask, filter_pipeline, itemsize):

From cd9c0383cb32f9ec85c42ed3c06bee5489879165 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Wed, 22 Oct 2025 16:47:58 +0100
Subject: [PATCH 03/16] Just making a note that we need to be careful about
 chunk indexing should we have a v3 layout in the future.

---
 pyfive/h5d.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyfive/h5d.py b/pyfive/h5d.py
index b1b6895..fb28a19 100644
--- a/pyfive/h5d.py
+++ b/pyfive/h5d.py
@@ -321,6 +321,10 @@ def _build_index(self, dataobject):
         
         logging.info(f'Building chunk index in pyfive {version("pyfive")}')
        
+        #FIXME: How do we know it's a V1 B-tree?
+        # There are potentially five different chunk indexing options according to
+        # https://docs.hdfgroup.org/archive/support/HDF5/doc/H5.format.html#AppendixC
+        
         chunk_btree = BTreeV1RawDataChunks(
                 dataobject.fh, dataobject._chunk_address, dataobject._chunk_dims)
         

From 1c9bef4a91a3f3f4ab3241ebe0ce7f66345faf34 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Wed, 22 Oct 2025 16:48:56 +0100
Subject: [PATCH 04/16] First cut at supporting ncdump like behaviour. Doesn't
 have support for file or group attributes yet, or phony dimensions.

---
 .vscode/settings.json   | 16 ++++++++
 pyfive/__init__.py      |  1 +
 pyfive/inspect.py       | 83 +++++++++++++++++++++++++++++++++++++++++
 pyfive/p5dump.py        | 39 +++++++++++++++++++
 pyproject.toml          |  3 ++
 tests/test_dump.py      | 48 ++++++++++++++++++++++++
 tests/test_mock_s3fs.py | 10 ++++-
 7 files changed, 199 insertions(+), 1 deletion(-)
 create mode 100644 .vscode/settings.json
 create mode 100644 pyfive/inspect.py
 create mode 100644 pyfive/p5dump.py
 create mode 100644 tests/test_dump.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..8391adb
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,16 @@
+{
+    "python.defaultInterpreterPath": "/Users/bnl28/mambaforge/envs/pyfive-25aug/bin/python",
+    "python.terminal.activateEnvironment":true,
+    "terminal.integrated.profiles.osx": {
+      "zsh": {
+        "path": "/bin/zsh",
+        "args": ["-l"]
+       }
+     },
+     "terminal.integrated.defaultProfile.osx": "zsh",
+     "esbonio.server.env": {
+       "PATH": "/Users/bnl28/mambaforge/envs/pyfive-25aug/bin:${env:PATH}",
+       "PYTHONPATH": "/Users/bnl28/mambaforge/envs/pyfive-25aug/lib/python3.12/site-packages"
+    },
+    "esbonio.server.pythonPath": "/Users/bnl28/mambaforge/envs/pyfive-25aug/bin/python"
+}
diff --git a/pyfive/__init__.py b/pyfive/__init__.py
index 7a83096..f2c54dc 100644
--- a/pyfive/__init__.py
+++ b/pyfive/__init__.py
@@ -8,6 +8,7 @@
 from pyfive.h5t import check_enum_dtype, check_string_dtype, check_dtype, opaque_dtype, check_opaque_dtype
 from pyfive.h5py import Datatype, Empty
 from importlib.metadata import version
+from pyfive.inspect import p5ncdump
 
 __version__ = '0.5.0.dev'
 
diff --git a/pyfive/inspect.py b/pyfive/inspect.py
new file mode 100644
index 0000000..252dcee
--- /dev/null
+++ b/pyfive/inspect.py
@@ -0,0 +1,83 @@
+from pathlib import Path
+import pyfive
+def clean_types(dtype):
+    """Convert a numpy dtype to classic ncdump type string."""
+    # Strip endianness (> or <) and map to ncdump types
+    kind = dtype.kind
+    itemsize = dtype.itemsize
+    if kind == "f":  # floating point
+        return f"float{itemsize*8}"
+    elif kind == "i":  # signed integer
+        return f"int{itemsize*8}"
+    elif kind == "u":  # unsigned integer
+        return f"uint{itemsize*8}"
+    elif kind == "S" or kind == "a":  # fixed-length bytes
+        return "char"
+    else:
+        return str(dtype)  # fallback
+
+
+def dump_header(f, filename):
+
+    print(f"File: {filename} "+'{')
+    dims = set()
+    datasets = {name: f[name] for name in f.keys() if hasattr(f[name], "shape")}
+    for ds in datasets.values():
+        for dim in ds.dims:
+            for scale in dim:
+                dims.add((scale.name.split('/')[-1],scale.shape[0]))
+    if dims:
+        print("dimensions:")
+    for d in dims:
+        print(f'         {d[0]}={d[1]};')
+    
+    print("variables:")
+    for name,ds in datasets.items():
+        
+        # Variable type
+        dtype_str = clean_types(ds.dtype)
+
+        # Dimensions for this variable (use dims if available)
+        if hasattr(ds, "dims") and len(ds.dims) > 0:
+            dim_names = [scale.name.split('/')[-1] for dim in ds.dims for scale in dim]
+        else:
+            # fallback: no dims
+            dim_names = []
+
+        dim_str = "(" + ", ".join(dim_names) + ")" if dim_names else ""
+        print(f"         {dtype_str} {name}{dim_str};")
+
+        # Attributes
+        ommit = ['CLASS','NAME','_Netcdf4Dimid','REFERENCE_LIST','DIMENSION_LIST','_Netcdf4Coordinates']
+        for attr_name, attr_val in ds.attrs.items():
+            if attr_name not in ommit:
+                if isinstance(attr_val, bytes):
+                    attr_val = f'"{attr_val.decode("utf-8")}"'
+                print(f"              {name}:{attr_name} = {attr_val};")
+    print('}')
+
+def p5ncdump(file_path, special=False):
+
+    if special:
+        raise NotImplementedError
+
+    # handle posix and S3 differently
+    filename = getattr(file_path,'full_name', None)
+    if filename is None:
+        filename = file_path
+    filename = Path(filename).name
+
+    try:
+        print('Now going to pyfive')
+        with pyfive.File(file_path) as f:
+            # Attach dims if not already attached
+            print('opened')
+            for name in f.keys():
+                ds = f[name]   #bugger this is a b-tree read
+                if hasattr(ds, "shape") and not hasattr(ds, "dims"):
+                    # internally pyfive may attach dims automatically, but safe to attach here
+                    ds.dims  # access triggers dimension proxies
+            dump_header(f, filename)
+    except NotImplementedError as e:
+        if 'unsupported superblock' in str(e):
+            raise ValueError('Not an HDF5 or NC4 file!')
\ No newline at end of file
diff --git a/pyfive/p5dump.py b/pyfive/p5dump.py
new file mode 100644
index 0000000..f6c53aa
--- /dev/null
+++ b/pyfive/p5dump.py
@@ -0,0 +1,39 @@
+from pyfive import p5ncdump
+import sys
+
+def main(argv=None):
+    """
+    Provides some of the functionality of tools like ncdump and h5dump.
+    By default this will attempt to do something similar to ncdump.
+    - h will return this information
+    - s (not yet implemented) will provide additional information
+    """
+    if argv is None:
+        argv = sys.argv[1:]  # ignore script name
+
+    match argv:
+        # script      → error (no filename)
+        case []:
+            raise ValueError("No filename provided")
+
+        # script -h   → help
+        case ["-h"]:
+            print(main.__doc__)
+            return 0
+
+        # script filename
+        case [filename]:
+            p5ncdump(filename, special=False)
+            return 0
+
+        # script -s filename
+        case ["-s", filename]:
+            p5ncdump(filename, special=True)
+            return 0
+
+        # Anything else → error
+        case _:
+            raise ValueError(f"Invalid arguments: {argv}")
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/pyproject.toml b/pyproject.toml
index e83b13f..ce6333d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,9 @@ license = {text = "BSD License, Version 3-Clause"}
 name = "pyfive"
 requires-python = ">=3.10"
 
+[project.scripts]
+p5dump = "pyfive.p5dump:main"
+
 [project.optional-dependencies]
 test = [
     "pytest>6.0.0",
diff --git a/tests/test_dump.py b/tests/test_dump.py
new file mode 100644
index 0000000..5b4dc24
--- /dev/null
+++ b/tests/test_dump.py
@@ -0,0 +1,48 @@
+import pytest
+from pyfive.p5dump import main
+import os
+
+
+DIRNAME = os.path.dirname(__file__)
+EARLIEST_HDF5_FILE = os.path.join(DIRNAME, 'data', 'earliest.hdf5')
+
+#
+# A standard nicely behaved netcdf4 file is tested in test_mock_s3fs
+# Kill two birds with one stone there.
+#
+
+def test_old_hd5_with_groups(capsys):
+    filename = EARLIEST_HDF5_FILE
+
+    # No exception means success
+    assert main([filename]) == 0
+
+    captured = capsys.readouterr()
+
+    #currently failing
+    assert 'phony_dim_0' in captured.out
+    assert 'dataset3(phony_dim_0)' in captured.out
+    assert 'string :attr5 = "Test"' in captured.out
+
+
+# Test: script -s filename (special mode)
+def test_main_special_real():
+    filename = "tests/data/sample.nc"
+    with pytest.raises(NotImplementedError):
+        assert main(["-s", filename]) == 0
+
+# Test: -h should print help
+def test_main_help_real(capsys):
+    main(["-h"])
+    captured = capsys.readouterr()
+    assert "Provides some of the functionality" in captured.out
+
+# Test: no filename → error
+def test_main_no_args_real():
+    with pytest.raises(ValueError):
+        main([])
+
+# Test: invalid flag → error
+def test_main_invalid_args_real():
+    with pytest.raises(ValueError):
+        main(["-x", "file.nc"])
\ No newline at end of file
diff --git a/tests/test_mock_s3fs.py b/tests/test_mock_s3fs.py
index 7beffb2..6cc0a59 100644
--- a/tests/test_mock_s3fs.py
+++ b/tests/test_mock_s3fs.py
@@ -24,7 +24,7 @@ def test_s3fs_s3(s3fs_s3):
     assert mock_s3_filesystem.client_kwargs == {'endpoint_url': 'http://127.0.0.1:5555/'}
 
 
-def test_s3file_with_s3fs(s3fs_s3):
+def test_s3file_with_s3fs(s3fs_s3, capsys):
     """
     This test spoofs a complete s3fs FileSystem via s3fs_s3,
     creates a mock bucket inside it, then puts a REAL netCDF4 file in it,
@@ -57,3 +57,11 @@ def test_s3file_with_s3fs(s3fs_s3):
         pyfive_ds = pyfive.File(f)
         print(f"Dataset loaded from mock S3 with s3fs and Pyfive: ds")
         assert "q" in pyfive_ds
+
+    # test command command line main test_s3
+    with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f:
+        pyfive.p5ncdump(f)
+
+    captured = capsys.readouterr()
+    assert ('File: issue23_A.nc' in captured.out)
+    assert ('q:cell_methods = "area: mean"' in captured.out)
\ No newline at end of file

From 9c1b6889818af55c076ecd89a4820580f4e88c61 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Thu, 23 Oct 2025 14:21:18 +0100
Subject: [PATCH 05/16] Working implementation of lazy access to variables
 (#135) and partial implementation of p5dump functionality (#134). Unit tests
 are failing due to a desire to get closer to (but not exactly) what ncdump
 will do.

---
 pyfive/h5d.py                     | 49 ++++++++++++++++++++++---------
 pyfive/high_level.py              | 41 ++++++++++++++++++++++++--
 pyfive/inspect.py                 |  6 +++-
 tests/test_chunk_index_options.py | 28 ++++++++++++++++++
 tests/test_mock_s3fs.py           |  3 +-
 5 files changed, 109 insertions(+), 18 deletions(-)
 create mode 100644 tests/test_chunk_index_options.py

diff --git a/pyfive/h5d.py b/pyfive/h5d.py
index fb28a19..a2168e3 100644
--- a/pyfive/h5d.py
+++ b/pyfive/h5d.py
@@ -12,6 +12,7 @@
 from importlib.metadata import version
 
 StoreInfo = namedtuple('StoreInfo',"chunk_offset filter_mask byte_offset size")
+ChunkIndex = namedtuple('ChunkIndex',"chunk_address chunk_dims")
 
 class DatasetID:
     """ 
@@ -27,10 +28,15 @@ class DatasetID:
     from the parent file access as both share underlying C-structures.*
 
     """
-    def __init__(self, dataobject, pseudo_chunking_size_MB=4):
+    def __init__(self, dataobject, noindex=False, pseudo_chunking_size_MB=4):
         """ 
         Instantiated with the ``pyfive`` ``datasetdataobject``, we copy and cache everything 
         we want so that the only file operations are now data accesses.
+
+        noindex provides a method for controlling how lazy the data load 
+        actually is. This version supports values of False (normal behaviour
+        index is read when datasetid first instantiated) or True (index
+        is only read when the data is accessed).
         
         if ``pseudo_chunking_size_MB`` is set to a value greater than zero, and
         if the storage is not local posix (and hence ``np.mmap``is not available) then 
@@ -102,7 +108,9 @@ def __init__(self, dataobject, pseudo_chunking_size_MB=4):
 
         self._meta = DatasetMeta(dataobject)
 
-        self._index =  None
+        self._index = None
+        self.__index_built = False
+        self._index_params = None
         # throws a flake8 wobbly for Python<3.10; match is Py3.10+ syntax
         match self.layout_class:  # noqa
             case 0:  #compact storage
@@ -110,7 +118,11 @@ def __init__(self, dataobject, pseudo_chunking_size_MB=4):
             case 1:  # contiguous storage
                 self.data_offset, = struct.unpack_from('<Q', dataobject.msg_data, self.property_offset)
             case 2:  # chunked storage
-                self._build_index(dataobject)
+                self._index_params = ChunkIndex(
+                    dataobject._chunk_address,
+                    dataobject._chunk_dims)
+                if not noindex: 
+                    self._build_index()
 
     def __hash__(self):
         """ The hash is based on assuming the file path, the location
@@ -184,6 +196,8 @@ def get_data(self, args, fillvalue):
                 else:
                     return self._get_contiguous_data(args, fillvalue)
             case 2:  # chunked storage
+                if not self.__index_built:
+                    self._build_index()
                 if not self._index:
                     # no storage is backing array, return an array of
                     # fill values
@@ -237,7 +251,6 @@ def convert_slice(aslice):
             else:
                 yield convert_selection(out_selection)
 
-   
 
     ##### The following property is made available to support ActiveStorage
     ##### and to help those who may want to generate kerchunk indices and
@@ -245,9 +258,11 @@ def convert_slice(aslice):
     @property
     def index(self):
         """ Direct access to the chunk index, if there is one. This is a ``pyfive`` API extension. """
-        if self._index is None:
+        if self._index_params is None:
             raise ValueError('No chunk index available for HDF layout class {self.layout}')
         else:
+            if not self.__index_built:
+                self._build_index()
             return self._index
 
     ##### This property is made available to help understand object store performance
@@ -259,12 +274,13 @@ def btree_range(self):
         may be of use in understanding the read performance of chunked
         data in object stores.  ``btree_range`` is a ``pyfive`` API extgension.
         """
-        if self._index is None:
+        if self._index_params is None:
              raise ValueError('No b-tree available for HDF layout class {self.layout}')
         else:
+            if not self.__index_built:
+                self._build_index()
             return (self._btree_start, self._btree_end)
 
-
     #### The following method can be used to set pseudo chunking size after the 
     #### file has been closed and before data transactions. This is pyfive specific
     def set_pseudo_chunk_size(self, newsize_MB):
@@ -302,7 +318,7 @@ def get_chunk_info_from_chunk_coord(self, chunk_coords):
     # third parties to use them. They are not H5Py methods.
     ######
 
-    def _build_index(self, dataobject):
+    def _build_index(self):
         """ 
         Build the chunk index if it doesn't exist. This is only 
         called for chunk data, and only when the variable is accessed.
@@ -314,8 +330,11 @@ def _build_index(self, dataobject):
         if self._index is not None: 
             return
         
+        if self._index_params is None:
+            raise RuntimeError('Attempt to build index with no chunk index parameters')
+
         # look out for an empty dataset, which will have no btree
-        if np.prod(self.shape) == 0 or dataobject._chunk_address == UNDEFINED_ADDRESS:
+        if np.prod(self.shape) == 0 or self._index_params.chunk_address == UNDEFINED_ADDRESS:
             self._index = {}
             return
         
@@ -324,14 +343,16 @@ def _build_index(self, dataobject):
         #FIXME: How do we know it's a V1 B-tree?
         # There are potentially five different chunk indexing options according to
         # https://docs.hdfgroup.org/archive/support/HDF5/doc/H5.format.html#AppendixC
-        
+
+        fh = self._fh
         chunk_btree = BTreeV1RawDataChunks(
-                dataobject.fh, dataobject._chunk_address, dataobject._chunk_dims)
-        
+                fh, self._index_params.chunk_address, self._index_params.chunk_dims)
+        if self.posix:
+            fh.close()
+
         self._index = {}
         self._nthindex = []
         
-        
         for node in chunk_btree.all_nodes[0]:
             self._btree_start=node['addresses'][0]
             self._btree_end=node['addresses'][0]
@@ -344,7 +365,7 @@ def _build_index(self, dataobject):
                 self._index[key] = StoreInfo(key, filter_mask, addr, size)
                 self._btree_end=max(addr,self._btree_end)
 
-
+        self.__index_built=True
 
     def _get_contiguous_data(self, args, fillvalue):
 
diff --git a/pyfive/high_level.py b/pyfive/high_level.py
index 86bcf68..2a4ad4b 100644
--- a/pyfive/high_level.py
+++ b/pyfive/high_level.py
@@ -60,7 +60,44 @@ def _dereference(self, ref):
         return obj
 
     def __getitem__(self, y):
-        """ x.__getitem__(y) <==> x[y] """
+        """ x.__getitem__(y) <==> x[y].
+        """
+        return self.__getitem_lazy_control(y, noindex=False)
+
+
+    def get_lazy_view(self, y):
+        """ 
+        This instantiates the object y, and if it is a 
+        chunked dataset, does so without reading the b-tree
+        index. This is useful for inspecting a variable
+        that you are not expecting to access. If you know you
+        want to access the data, and in particular, if you are 
+        going to hand the data to Dask or something else, you
+        almost certainly want to read the index now, so
+        just do x[y] rather than x.get_lazy_view(y).
+
+        This is a ``pyfive`` extension to the standard h5py API.
+        """
+
+        return self.__getitem_lazy_control(y, noindex=True)
+
+
+    def __getitem_lazy_control(self, y, noindex):
+        """ 
+        This is the routine which actually does the get item
+        but does it in such a way that we control how much laziness
+        is possible where we have chunked variables with b-trees.
+
+        We want to return y, but if y is a chunked dataset we
+        normally return it with a cached b-tree (noindex=false).
+        If noindex is True, we do not read the b-tree, and that 
+        will be done when data is first read - which is fine
+        in a single-threaded environment, but in a parallel
+        environment you only want to read the index once
+        (so use noindex=False, which you get via the 
+        normal getitem interface - x[y]).
+        """
+
         if isinstance(y, Reference):
             return self._dereference(y)
 
@@ -92,7 +129,7 @@ def __getitem__(self, y):
         if dataobjs.is_dataset:
             if additional_obj != '.':
                 raise KeyError('%s is a dataset, not a group' % (obj_name))
-            return Dataset(obj_name, DatasetID(dataobjs), self)
+            return Dataset(obj_name, DatasetID(dataobjs, noindex=noindex), self)
        
         try:
             # if true, this may well raise a NotImplementedError, if so, we need
diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index 252dcee..59c7576 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -21,7 +21,11 @@ def dump_header(f, filename):
 
     print(f"File: {filename} "+'{')
     dims = set()
-    datasets = {name: f[name] for name in f.keys() if hasattr(f[name], "shape")}
+    datasets = {}
+    for name in f:
+        item = f.get_lazy_view(name)
+        if hasattr(item,"shape"):
+            datasets[name]=item
     for ds in datasets.values():
         for dim in ds.dims:
             for scale in dim:
diff --git a/tests/test_chunk_index_options.py b/tests/test_chunk_index_options.py
new file mode 100644
index 0000000..0384f8a
--- /dev/null
+++ b/tests/test_chunk_index_options.py
@@ -0,0 +1,28 @@
+""" Test pyfive's abililty to read multidimensional datasets. """
+import os
+
+import numpy as np
+from numpy.testing import assert_array_equal
+
+import pyfive
+
+DIRNAME = os.path.dirname(__file__)
+DATASET_CHUNKED_HDF5_FILE = os.path.join(DIRNAME, "data", 'chunked.hdf5')
+
+
+def test_lazy_index():
+
+    with pyfive.File(DATASET_CHUNKED_HDF5_FILE) as hfile:
+
+        # instantiate variable
+        dset1 = hfile.get_lazy_view('dataset1')
+
+        # should be able to see attributes but not have an index yet
+        assert dset1.attrs['attr1'] == 130
+
+        # test we have no index yet 
+        assert dset1.id._DatasetID__index_built==False
+
+        # this should force an index build
+        assert_array_equal(dset1[:], np.arange(21*16).reshape((21, 16)))
+        assert dset1.chunks == (2, 2)
\ No newline at end of file
diff --git a/tests/test_mock_s3fs.py b/tests/test_mock_s3fs.py
index 6cc0a59..0896a4e 100644
--- a/tests/test_mock_s3fs.py
+++ b/tests/test_mock_s3fs.py
@@ -64,4 +64,5 @@ def test_s3file_with_s3fs(s3fs_s3, capsys):
 
     captured = capsys.readouterr()
     assert ('File: issue23_A.nc' in captured.out)
-    assert ('q:cell_methods = "area: mean"' in captured.out)
\ No newline at end of file
+    assert ('q:cell_methods = "area: mean"' in captured.out)
+    assert (':Conventions = "CF-1.12"' in captured.out)
\ No newline at end of file

From 2c3a3e3657c72287f96d85262f0524466217fd94 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Fri, 24 Oct 2025 16:09:55 +0100
Subject: [PATCH 06/16] allow visititems to be lazy

---
 pyfive/high_level.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/pyfive/high_level.py b/pyfive/high_level.py
index 2a4ad4b..6c97b96 100644
--- a/pyfive/high_level.py
+++ b/pyfive/high_level.py
@@ -162,7 +162,7 @@ def visit(self, func):
         """
         return self.visititems(lambda name, obj: func(name))
 
-    def visititems(self, func):
+    def visititems(self, func, noindex=False):
         """
         Recursively visit all objects in this group and subgroups.
 
@@ -173,11 +173,25 @@ def visititems(self, func):
         Returning None continues iteration, return anything else stops and
         return that value from the visit method.
 
+        Use of the optional noindex=True will ensure that
+        all operations are not only lazy wrt data, but lazy
+        wrt to any chunked data indices.
+
         """
         root_name_length = len(self.name)
         if not self.name.endswith('/'):
             root_name_length += 1
-        queue = deque(self.values())
+
+        # Use either normal access or lazy access:
+        if noindex:
+            # Avoid loading dataset indices
+            get_obj = self.get_lazy_view
+        else:
+            get_obj = self.__getitem__
+
+        # Initialize queue using the correct getter
+        queue = deque(get_obj(k) for k in self._links.keys())
+
         while queue:
             obj = queue.popleft()
             name = obj.name[root_name_length:]

From 7827480f90ea86f41cea2d1f03b5f2f983aa957d Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Fri, 24 Oct 2025 16:10:20 +0100
Subject: [PATCH 07/16] p5dump works for the test cases

---
 pyfive/inspect.py | 170 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 133 insertions(+), 37 deletions(-)

diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index 59c7576..f01bd94 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -1,5 +1,7 @@
 from pathlib import Path
-import pyfive
+
+from pyfive import Dataset, Group, File 
+
 def clean_types(dtype):
     """Convert a numpy dtype to classic ncdump type string."""
     # Strip endianness (> or <) and map to ncdump types
@@ -17,48 +19,141 @@ def clean_types(dtype):
         return str(dtype)  # fallback
 
 
-def dump_header(f, filename):
+def collect_dimensions_from_root(root):
+    """
+    Collect true netCDF-style dimensions from the root group only.
+
+    Returns
+    -------
+    dims : dict
+        Maps dimension name (str) -> size (int)
+    """
+    dims = {}
+
+    for name in root:
+        
+        obj = root.get_lazy_view(name)
+        # Must be a dataset to be a dimension scale
+        if not isinstance(obj,Dataset):
+            continue
+
+        # Must have CLASS="DIMENSION_SCALE" to qualify
+        if str(obj.attrs.get("CLASS")) == "b'DIMENSION_SCALE'":
+            # NetCDF stores the real dimension name under NAME
+            dim_name = obj.attrs.get("NAME").decode()
+            if dim_name.startswith('This is a netCDF dimension but not a'):
+                dim_name = name
+            # Use the first axis of its shape as the dimension size
+            size = obj.shape[0] if hasattr(obj, "shape") and obj.shape else None
+
+            # Only add if size makes sense
+            if size is not None:
+                dims[dim_name] = size
+
+    return dims
+
+def gather_dimensions(obj, alldims, phonys, real_dimensions):
+    """ 
+    Gather dimensions from dimension scales if present, and if not, 
+    infer infer phony dimensions (to behave like netcdf reporting of and HDF5 file). 
+    For a dump that seems useful even if we are an HDF5 only application.
+    Monkey patch these dims alongside existing dimension manager.
+    """
+   
+    if not hasattr(obj,'__inspected_dims'):
+        obj.__inspected_dims=[]
+   
+    oname = obj.name.split('/')[-1]
+ 
+    for axis, size in enumerate(obj.shape):
+       
+        if obj.dims[axis]:  # real scale exists
+            edim = (obj.dims[axis][0].name.split('/')[-1], size)
+
+        elif size in real_dimensions.values():
+            dim_name = next(name for name, sz in real_dimensions.items() if sz == size)
+            edim = (dim_name, size)
+        else:
+            # make or reuse a phony dimension name
+            if size not in phonys:
+                phonys[size] = f"phony_dim_{len(phonys)}"
+            pname = phonys[size]
+            edim = (pname,size)
+ 
+        obj.__inspected_dims.append(edim)
+        alldims.add(edim)
+
+    return obj, alldims, phonys
+
+
+def dump_header(obj, indent, real_dimensions):
+    """ Pretty print a group within an HDF5 file (including the root group) """
+
+    def printattr(attrs, ommit=[]):
+        """ Pretty print a set of attributes """
+        for k,v in attrs.items():
+            if k not in ommit:
+                if isinstance(v, bytes):
+                    v = f'"{v.decode("utf-8")}"'
+                print(f"{indent}{dindent}{dindent}{name}:{k} = {v} ;")
 
-    print(f"File: {filename} "+'{')
     dims = set()
     datasets = {}
-    for name in f:
-        item = f.get_lazy_view(name)
-        if hasattr(item,"shape"):
+    groups = {}
+    phonys = {}
+
+    for name in obj:
+        item = obj.get_lazy_view(name)
+        if isinstance(item, Dataset):
+            if str(item.attrs.get('NAME','None')).startswith('This is a netCDF dimension but not a'):
+                    continue
             datasets[name]=item
+        elif isinstance(item, Group):
+            groups[name]=item
+
+
     for ds in datasets.values():
-        for dim in ds.dims:
-            for scale in dim:
-                dims.add((scale.name.split('/')[-1],scale.shape[0]))
+        ds, dims, phonys = gather_dimensions(ds, dims, phonys, real_dimensions)
     if dims:
-        print("dimensions:")
+        print(f"{indent}dimensions:")
+    dindent = '        '
     for d in dims:
-        print(f'         {d[0]}={d[1]};')
+        print(f'{indent}{dindent}{d[0]} = {d[1]};')
     
-    print("variables:")
+    print(f"{indent}variables:")
     for name,ds in datasets.items():
         
         # Variable type
         dtype_str = clean_types(ds.dtype)
 
         # Dimensions for this variable (use dims if available)
-        if hasattr(ds, "dims") and len(ds.dims) > 0:
-            dim_names = [scale.name.split('/')[-1] for dim in ds.dims for scale in dim]
-        else:
-            # fallback: no dims
-            dim_names = []
-
-        dim_str = "(" + ", ".join(dim_names) + ")" if dim_names else ""
-        print(f"         {dtype_str} {name}{dim_str};")
+        if hasattr(ds,'__inspected_dims'):
+            dim_names = [d[0] for d in ds.__inspected_dims]
+            dim_str = "(" + ", ".join(dim_names) + ")" if dim_names else ""
+            print(f"{indent}{dindent}{dtype_str} {name}{dim_str} ;")
 
         # Attributes
-        ommit = ['CLASS','NAME','_Netcdf4Dimid','REFERENCE_LIST','DIMENSION_LIST','_Netcdf4Coordinates']
-        for attr_name, attr_val in ds.attrs.items():
-            if attr_name not in ommit:
-                if isinstance(attr_val, bytes):
-                    attr_val = f'"{attr_val.decode("utf-8")}"'
-                print(f"              {name}:{attr_name} = {attr_val};")
-    print('}')
+        ommit = ['CLASS','NAME','_Netcdf4Dimid',
+                 'REFERENCE_LIST','DIMENSION_LIST','DIMENSION_LABELS','_Netcdf4Coordinates']
+        
+        printattr(ds.attrs, ommit)
+       
+    if isinstance(obj, File):
+        hstr='// global '
+    elif isinstance(obj, Group):
+        hstr=f'{indent}// group '
+    if obj.attrs:
+        print(hstr+'attributes:')
+        printattr(obj.attrs, ['_NCProperties'])
+        
+    if groups:
+        for g,o in groups.items():
+            print(f'{indent}group: {g} '+'{')
+            gindent = indent+' '
+            dump_header(o,gindent,real_dimensions)
+            print(gindent+'}'+f' // group {g}')
+   
+
 
 def p5ncdump(file_path, special=False):
 
@@ -72,16 +167,17 @@ def p5ncdump(file_path, special=False):
     filename = Path(filename).name
 
     try:
-        print('Now going to pyfive')
-        with pyfive.File(file_path) as f:
-            # Attach dims if not already attached
-            print('opened')
-            for name in f.keys():
-                ds = f[name]   #bugger this is a b-tree read
-                if hasattr(ds, "shape") and not hasattr(ds, "dims"):
-                    # internally pyfive may attach dims automatically, but safe to attach here
-                    ds.dims  # access triggers dimension proxies
-            dump_header(f, filename)
+        with File(file_path) as f:
+
+            # we assume all the netcdf 4 dimnnsions, if they exist, are in the root group
+            real_dimensions = collect_dimensions_from_root(f)
+    
+            # ok, go for it
+            print(f"File: {filename} "+'{')
+            indent = ''
+            dump_header(f, indent, real_dimensions)
+            print('}')
+
     except NotImplementedError as e:
         if 'unsupported superblock' in str(e):
             raise ValueError('Not an HDF5 or NC4 file!')
\ No newline at end of file

From 481ca47a351f555839905f4e47198ac01e997f81 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Fri, 24 Oct 2025 16:15:21 +0100
Subject: [PATCH 08/16] Fixed string handling in groups

---
 pyfive/inspect.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index f01bd94..cbae9d9 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -95,6 +95,8 @@ def printattr(attrs, ommit=[]):
             if k not in ommit:
                 if isinstance(v, bytes):
                     v = f'"{v.decode("utf-8")}"'
+                elif isinstance(v,str):
+                    v = f'"{v}"'
                 print(f"{indent}{dindent}{dindent}{name}:{k} = {v} ;")
 
     dims = set()

From 5ab84bfdab89da6e4766dbb97c06f46d3569245d Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Fri, 24 Oct 2025 16:27:08 +0100
Subject: [PATCH 09/16] Better testing

---
 tests/test_chunk_index_options.py | 29 ++++++++++++++++++++++++++++-
 tests/test_dump.py                |  4 ++--
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/tests/test_chunk_index_options.py b/tests/test_chunk_index_options.py
index 0384f8a..1f6f8e6 100644
--- a/tests/test_chunk_index_options.py
+++ b/tests/test_chunk_index_options.py
@@ -25,4 +25,31 @@ def test_lazy_index():
 
         # this should force an index build
         assert_array_equal(dset1[:], np.arange(21*16).reshape((21, 16)))
-        assert dset1.chunks == (2, 2)
\ No newline at end of file
+        assert dset1.chunks == (2, 2)
+
+
+def test_lazy_visititems():
+
+    def simpler_check(x,y):
+        """ Expect this to be visited and instantiated without an index """
+        print(x,y.name)
+        assert y.attrs['attr1'] == 130
+        assert y.id._DatasetID__index_built==False
+
+    def simplest_check(x,y):
+        """ Expect this to be visited and instantiated with an index """
+        print(x,y.name)
+        assert y.attrs['attr1'] == 130
+        assert y.id._DatasetID__index_built==True
+
+   
+    with pyfive.File(DATASET_CHUNKED_HDF5_FILE) as hfile:
+
+        assert hfile.visititems(simpler_check,noindex=True) is None
+        assert hfile.visititems(simplest_check) is None
+
+
+
+
+
+
diff --git a/tests/test_dump.py b/tests/test_dump.py
index 5b4dc24..749283a 100644
--- a/tests/test_dump.py
+++ b/tests/test_dump.py
@@ -21,8 +21,8 @@ def test_old_hd5_with_groups(capsys):
 
     #currently failing
     assert 'phony_dim_0' in captured.out
-    assert 'dataset3(phony_dim_0)' in captured.out
-    assert 'string :attr5 = "Test"' in captured.out
+    assert 'dataset3(phony_dim' in captured.out
+    assert 'attr5 = "Test"' in captured.out
 
 
 # Test: script -s filename (special mode)

From a917d9ed79d54e14daf7b55840f39756423db464 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Sun, 26 Oct 2025 18:49:33 +0000
Subject: [PATCH 10/16] Support for chunk information in p5dump via -s. (Fixed
 btree_range as well I think)

---
 pyfive/btree.py   |  2 ++
 pyfive/h5d.py     | 19 ++++++++++++++-----
 pyfive/inspect.py | 19 ++++++++++++++-----
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/pyfive/btree.py b/pyfive/btree.py
index 7ce119b..a48d278 100644
--- a/pyfive/btree.py
+++ b/pyfive/btree.py
@@ -22,6 +22,7 @@ def __init__(self, fh, offset):
         self.offset = offset
         self.depth = None
         self.all_nodes = {}
+        self.last_offset = offset
 
         self._read_root_node()
         self._read_children()
@@ -53,6 +54,7 @@ def _read_node(self, offset, node_level):
         node = self._read_node_header(offset, node_level)
         node['keys'] = []
         node['addresses'] = []
+        self.last_offset=max(offset,self.last_offset)
         return node
 
     def _read_node_header(self, offset):
diff --git a/pyfive/h5d.py b/pyfive/h5d.py
index a2168e3..29a3d65 100644
--- a/pyfive/h5d.py
+++ b/pyfive/h5d.py
@@ -142,9 +142,11 @@ def get_chunk_info(self, index):
         """
         Retrieve storage information about a chunk specified by its index.
         """
-        if not self._index:
-            return None
+        if self._index_params is None:
+             raise ValueError('No chunk detail available for HDF layout class {self.layout}')
         else:
+            if not self.__index_built:
+                self._build_index()
             return self._index[self._nthindex[index]]
 
     def get_chunk_info_by_coord(self, coordinate_index):
@@ -161,6 +163,11 @@ def get_num_chunks(self):
         """ 
         Return total number of chunks in dataset
         """
+        if self._index_params is None:
+             raise ValueError('No chunk detail available for HDF layout class {self.layout}')
+        else:
+            if not self.__index_built:
+                self._build_index()
         return len(self._index)
     
     def read_direct_chunk(self, chunk_position, **kwargs):
@@ -354,8 +361,7 @@ def _build_index(self):
         self._nthindex = []
         
         for node in chunk_btree.all_nodes[0]:
-            self._btree_start=node['addresses'][0]
-            self._btree_end=node['addresses'][0]
+           
             for node_key, addr in zip(node['keys'], node['addresses']):
                 start = node_key['chunk_offset'][:-1]
                 key = start
@@ -363,7 +369,10 @@ def _build_index(self):
                 filter_mask = node_key['filter_mask']
                 self._nthindex.append(key)
                 self._index[key] = StoreInfo(key, filter_mask, addr, size)
-                self._btree_end=max(addr,self._btree_end)
+               
+
+        self._btree_start=chunk_btree.offset
+        self._btree_end=chunk_btree.last_offset
 
         self.__index_built=True
 
diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index cbae9d9..43f69ea 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -86,7 +86,7 @@ def gather_dimensions(obj, alldims, phonys, real_dimensions):
     return obj, alldims, phonys
 
 
-def dump_header(obj, indent, real_dimensions):
+def dump_header(obj, indent, real_dimensions, special):
     """ Pretty print a group within an HDF5 file (including the root group) """
 
     def printattr(attrs, ommit=[]):
@@ -139,6 +139,18 @@ def printattr(attrs, ommit=[]):
                  'REFERENCE_LIST','DIMENSION_LIST','DIMENSION_LABELS','_Netcdf4Coordinates']
         
         printattr(ds.attrs, ommit)
+
+        if special:
+            extras = {}
+            if ds.id._index_params:
+                extras['_n_chunks'] = ds.id.get_num_chunks()
+                extras['_chunk_shape'] = ds.id.chunks
+                extras['_btree_range'] = ds.id.btree_range
+                extras['_first_chunk'] = ds.id.get_chunk_info(0).byte_offset
+            if ds.compression:
+                extras['_compression'] = ds.compression+f'({ds.compression_opts})'
+            printattr(extras,[])
+
        
     if isinstance(obj, File):
         hstr='// global '
@@ -159,9 +171,6 @@ def printattr(attrs, ommit=[]):
 
 def p5ncdump(file_path, special=False):
 
-    if special:
-        raise NotImplementedError
-
     # handle posix and S3 differently
     filename = getattr(file_path,'full_name', None)
     if filename is None:
@@ -177,7 +186,7 @@ def p5ncdump(file_path, special=False):
             # ok, go for it
             print(f"File: {filename} "+'{')
             indent = ''
-            dump_header(f, indent, real_dimensions)
+            dump_header(f, indent, real_dimensions, special)
             print('}')
 
     except NotImplementedError as e:

From 173d9d170ae9e0a94e272681f84b3e0b53c3fafc Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Mon, 27 Oct 2025 09:09:03 +0000
Subject: [PATCH 11/16] p5dump -s includes storage type (from layout_class)

---
 pyfive/inspect.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index 43f69ea..51bc586 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -141,14 +141,14 @@ def printattr(attrs, ommit=[]):
         printattr(ds.attrs, ommit)
 
         if special:
-            extras = {}
-            if ds.id._index_params:
+            extras = {'_Storage':{0:'Compact',1:'Contiguous',2:'Chunked'}[ds.id.layout_class]}
+            if ds.id.layout_class==2:
                 extras['_n_chunks'] = ds.id.get_num_chunks()
                 extras['_chunk_shape'] = ds.id.chunks
                 extras['_btree_range'] = ds.id.btree_range
                 extras['_first_chunk'] = ds.id.get_chunk_info(0).byte_offset
-            if ds.compression:
-                extras['_compression'] = ds.compression+f'({ds.compression_opts})'
+                if ds.compression:
+                    extras['_compression'] = ds.compression+f'({ds.compression_opts})'
             printattr(extras,[])
 
        

From dae9eb1314470fae0bbf06b444795e49e1546262 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Mon, 27 Oct 2025 09:42:17 +0000
Subject: [PATCH 12/16] Edge case detection and bug fix

---
 pyfive/btree.py   | 1 +
 pyfive/h5d.py     | 2 ++
 pyfive/inspect.py | 7 ++++---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pyfive/btree.py b/pyfive/btree.py
index a48d278..15e8732 100644
--- a/pyfive/btree.py
+++ b/pyfive/btree.py
@@ -151,6 +151,7 @@ def _read_node(self, offset, node_level):
             addresses.append(chunk_address)
         node['keys'] = keys
         node['addresses'] = addresses
+        self.last_offset=max(offset,self.last_offset)
         return node
 
 
diff --git a/pyfive/h5d.py b/pyfive/h5d.py
index 29a3d65..c815cb3 100644
--- a/pyfive/h5d.py
+++ b/pyfive/h5d.py
@@ -343,6 +343,8 @@ def _build_index(self):
         # look out for an empty dataset, which will have no btree
         if np.prod(self.shape) == 0 or self._index_params.chunk_address == UNDEFINED_ADDRESS:
             self._index = {}
+            #FIXME: There are other edge cases for self._index = {} to handle
+            self._btree_end, self._btree_start = None, None
             return
         
         logging.info(f'Building chunk index in pyfive {version("pyfive")}')
diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index 51bc586..ce3ce9e 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -144,9 +144,10 @@ def printattr(attrs, ommit=[]):
             extras = {'_Storage':{0:'Compact',1:'Contiguous',2:'Chunked'}[ds.id.layout_class]}
             if ds.id.layout_class==2:
                 extras['_n_chunks'] = ds.id.get_num_chunks()
-                extras['_chunk_shape'] = ds.id.chunks
-                extras['_btree_range'] = ds.id.btree_range
-                extras['_first_chunk'] = ds.id.get_chunk_info(0).byte_offset
+                if extras['_n_chunks'] != 0:
+                    extras['_chunk_shape'] = ds.id.chunks
+                    extras['_btree_range'] = ds.id.btree_range
+                    extras['_first_chunk'] = ds.id.get_chunk_info(0).byte_offset
                 if ds.compression:
                     extras['_compression'] = ds.compression+f'({ds.compression_opts})'
             printattr(extras,[])

From aa4ca30e5a5653680453753a18a2ee43fbb9f270 Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Mon, 27 Oct 2025 09:59:51 +0000
Subject: [PATCH 13/16] Handling broken pipes more gracefully

---
 pyfive/inspect.py | 26 ++++++++++++++++----------
 pyfive/p5dump.py  | 32 +++++++++++++++++++++-----------
 2 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index ce3ce9e..20ef409 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -2,6 +2,12 @@
 
 from pyfive import Dataset, Group, File 
 
+def safe_print(*args, **kwargs):
+    try:
+        print(*args, **kwargs)
+    except BrokenPipeError:
+        raise SystemExit(1)
+
 def clean_types(dtype):
     """Convert a numpy dtype to classic ncdump type string."""
     # Strip endianness (> or <) and map to ncdump types
@@ -97,7 +103,7 @@ def printattr(attrs, ommit=[]):
                     v = f'"{v.decode("utf-8")}"'
                 elif isinstance(v,str):
                     v = f'"{v}"'
-                print(f"{indent}{dindent}{dindent}{name}:{k} = {v} ;")
+                safe_print(f"{indent}{dindent}{dindent}{name}:{k} = {v} ;")
 
     dims = set()
     datasets = {}
@@ -117,10 +123,10 @@ def printattr(attrs, ommit=[]):
     for ds in datasets.values():
         ds, dims, phonys = gather_dimensions(ds, dims, phonys, real_dimensions)
     if dims:
-        print(f"{indent}dimensions:")
+        safe_print(f"{indent}dimensions:")
     dindent = '        '
     for d in dims:
-        print(f'{indent}{dindent}{d[0]} = {d[1]};')
+        safe_print(f'{indent}{dindent}{d[0]} = {d[1]};')
     
     print(f"{indent}variables:")
     for name,ds in datasets.items():
@@ -132,7 +138,7 @@ def printattr(attrs, ommit=[]):
         if hasattr(ds,'__inspected_dims'):
             dim_names = [d[0] for d in ds.__inspected_dims]
             dim_str = "(" + ", ".join(dim_names) + ")" if dim_names else ""
-            print(f"{indent}{dindent}{dtype_str} {name}{dim_str} ;")
+            safe_print(f"{indent}{dindent}{dtype_str} {name}{dim_str} ;")
 
         # Attributes
         ommit = ['CLASS','NAME','_Netcdf4Dimid',
@@ -158,15 +164,15 @@ def printattr(attrs, ommit=[]):
     elif isinstance(obj, Group):
         hstr=f'{indent}// group '
     if obj.attrs:
-        print(hstr+'attributes:')
-        printattr(obj.attrs, ['_NCProperties'])
+        safe_print(hstr+'attributes:')
+        safe_printattr(obj.attrs, ['_NCProperties'])
         
     if groups:
         for g,o in groups.items():
-            print(f'{indent}group: {g} '+'{')
+            safe_print(f'{indent}group: {g} '+'{')
             gindent = indent+' '
             dump_header(o,gindent,real_dimensions)
-            print(gindent+'}'+f' // group {g}')
+            safe_print(gindent+'}'+f' // group {g}')
    
 
 
@@ -185,10 +191,10 @@ def p5ncdump(file_path, special=False):
             real_dimensions = collect_dimensions_from_root(f)
     
             # ok, go for it
-            print(f"File: {filename} "+'{')
+            safe_print(f"File: {filename} "+'{')
             indent = ''
             dump_header(f, indent, real_dimensions, special)
-            print('}')
+            safe_print('}')
 
     except NotImplementedError as e:
         if 'unsupported superblock' in str(e):
diff --git a/pyfive/p5dump.py b/pyfive/p5dump.py
index f6c53aa..52f9364 100644
--- a/pyfive/p5dump.py
+++ b/pyfive/p5dump.py
@@ -1,39 +1,49 @@
 from pyfive import p5ncdump
 import sys
+import signal
 
 def main(argv=None):
     """
     Provides some of the functionality of tools like ncdump and h5dump.
     By default this will attempt to do something similar to ncdump.
     - h will return this information
-    - s (not yet implemented) will provide additional information
+    - will provide additional information
     """
     if argv is None:
         argv = sys.argv[1:]  # ignore script name
 
     match argv:
-        # script      → error (no filename)
         case []:
             raise ValueError("No filename provided")
-
-        # script -h   → help
         case ["-h"]:
             print(main.__doc__)
             return 0
-
-        # script filename
         case [filename]:
             p5ncdump(filename, special=False)
             return 0
-
-        # script -s filename
         case ["-s", filename]:
             p5ncdump(filename, special=True)
             return 0
-
-        # Anything else → error
         case _:
             raise ValueError(f"Invalid arguments: {argv}")
 
 if __name__ == '__main__':
-    sys.exit(main())
+    # Set SIGPIPE to default behaviour on Unix (ignored safely on Windows)
+    try:
+        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+    except (AttributeError, ValueError):
+        pass
+
+    try:
+        sys.exit(main())
+    except BrokenPipeError:
+        # Happens if pipe is closed early (e.g. `| head` or user quits `more`)
+        try:
+            sys.stderr.flush()
+        except Exception:
+            pass
+        sys.exit(0)
+    except Exception as e:
+        # Any other error: no traceback, just a clean message
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
\ No newline at end of file

From a84fea66d0f502f9a290adce7c1d7e898219482b Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Mon, 27 Oct 2025 10:32:14 +0000
Subject: [PATCH 14/16] Why don't I run my tests before committing?

---
 pyfive/inspect.py  | 4 ++--
 tests/test_dump.py | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pyfive/inspect.py b/pyfive/inspect.py
index 20ef409..04c59f5 100644
--- a/pyfive/inspect.py
+++ b/pyfive/inspect.py
@@ -165,13 +165,13 @@ def printattr(attrs, ommit=[]):
         hstr=f'{indent}// group '
     if obj.attrs:
         safe_print(hstr+'attributes:')
-        safe_printattr(obj.attrs, ['_NCProperties'])
+        printattr(obj.attrs, ['_NCProperties'])
         
     if groups:
         for g,o in groups.items():
             safe_print(f'{indent}group: {g} '+'{')
             gindent = indent+' '
-            dump_header(o,gindent,real_dimensions)
+            dump_header(o,gindent,real_dimensions, special=special)
             safe_print(gindent+'}'+f' // group {g}')
    
 
diff --git a/tests/test_dump.py b/tests/test_dump.py
index 749283a..771ad46 100644
--- a/tests/test_dump.py
+++ b/tests/test_dump.py
@@ -27,9 +27,8 @@ def test_old_hd5_with_groups(capsys):
 
 # Test: script -s filename (special mode)
 def test_main_special_real():
-    filename = "tests/data/sample.nc"
-    with pytest.raises(NotImplementedError):
-        assert main(["-s", filename]) == 0
+    filename = EARLIEST_HDF5_FILE
+    assert main(["-s", filename]) == 0
 
 # Test: -h should print help
 def test_main_help_real(capsys):

From b02083c41b135e551bc772115c54a6db7650314a Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Mon, 27 Oct 2025 13:33:26 +0000
Subject: [PATCH 15/16] Cleaning up the DatasetID interface error handling for
 chunk queries on unchunked data and some tests to keep V happy.

---
 pyfive/h5d.py                     | 63 +++++++++++++++++--------------
 tests/test_chunk_index_options.py | 57 +++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/pyfive/h5d.py b/pyfive/h5d.py
index c815cb3..5c3845f 100644
--- a/pyfive/h5d.py
+++ b/pyfive/h5d.py
@@ -138,44 +138,52 @@ def __eq__(self, other):
         """
         return self._unique == other._unique
 
+    def __chunk_init_check(self):
+        """ 
+        Used by all the chunk methods to see if this dataset is
+        chunked, and if so, if the index is present, and if not,
+        build it. Otherwise handle errors etc.
+        """
+        if self.layout_class != 2:
+            raise TypeError('Dataset is not chunked ')
+        return not self.index == {}
+
+
     def get_chunk_info(self, index):
         """
         Retrieve storage information about a chunk specified by its index.
         """
-        if self._index_params is None:
-             raise ValueError('No chunk detail available for HDF layout class {self.layout}')
-        else:
-            if not self.__index_built:
-                self._build_index()
+        if self.__chunk_init_check():
             return self._index[self._nthindex[index]]
+        else:
+            return None
+
 
     def get_chunk_info_by_coord(self, coordinate_index):
         """
         Retrieve information about a chunk specified by the array address of the chunk’s 
         first element in each dimension.
         """
-        if not self._index:
-            return None
-        else:
+        if self.__chunk_init_check():
             return self._index[coordinate_index]
+        else:
+            return None
     
     def get_num_chunks(self):
         """ 
         Return total number of chunks in dataset
         """
-        if self._index_params is None:
-             raise ValueError('No chunk detail available for HDF layout class {self.layout}')
+        if self.__chunk_init_check():
+            return len(self._index)
         else:
-            if not self.__index_built:
-                self._build_index()
-        return len(self._index)
+            return 0
     
     def read_direct_chunk(self, chunk_position, **kwargs):
         """
         Returns a tuple containing the filter_mask and the raw data storing this chunk as bytes.
         Additional arguments supported by ``h5py`` are not supported here.
         """
-        if not self.index:
+        if not self.__chunk_init_check():
             return None
         if chunk_position not in self._index:
             raise OSError("Chunk coordinates must lie on chunk boundaries")
@@ -229,9 +237,9 @@ def iter_chunks(self, args):
         intersection of the given chunk with the selection area. 
         This can be used to read data in that chunk.
         """
-        if self.chunks is None:
-            raise TypeError('Dataset is not chunked')
-        
+        if not self.__chunk_init_check():
+            return None
+
         def convert_selection(tuple_of_slices):
             # while a slice of the form slice(a,b,None) is equivalent
             # in function to a slice of form (a,b,1) it is not the same.
@@ -265,12 +273,13 @@ def convert_slice(aslice):
     @property
     def index(self):
         """ Direct access to the chunk index, if there is one. This is a ``pyfive`` API extension. """
-        if self._index_params is None:
-            raise ValueError('No chunk index available for HDF layout class {self.layout}')
-        else:
-            if not self.__index_built:
-                self._build_index()
-            return self._index
+        # can't use init_chunk_check because that would be an infinite regression
+        if self.layout_class != 2:
+            raise TypeError("Data is not chunked") 
+        if not self._index:
+            self._build_index()
+        return self._index
+        
 
     ##### This property is made available to help understand object store performance
     @property
@@ -281,12 +290,8 @@ def btree_range(self):
         may be of use in understanding the read performance of chunked
         data in object stores.  ``btree_range`` is a ``pyfive`` API extgension.
         """
-        if self._index_params is None:
-             raise ValueError('No b-tree available for HDF layout class {self.layout}')
-        else:
-            if not self.__index_built:
-                self._build_index()
-            return (self._btree_start, self._btree_end)
+        self.__chunk_init_check()
+        return (self._btree_start, self._btree_end)
 
     #### The following method can be used to set pseudo chunking size after the 
     #### file has been closed and before data transactions. This is pyfive specific
diff --git a/tests/test_chunk_index_options.py b/tests/test_chunk_index_options.py
index 1f6f8e6..5ce2872 100644
--- a/tests/test_chunk_index_options.py
+++ b/tests/test_chunk_index_options.py
@@ -1,14 +1,21 @@
-""" Test pyfive's abililty to read multidimensional datasets. """
+""" 
+Test pyfive's abililty to read multidimensional datasets
+and variants of the chunk index accesses 
+"""
 import os
 
 import numpy as np
 from numpy.testing import assert_array_equal
 
 import pyfive
+from pyfive.h5d import StoreInfo
+import pytest
 
 DIRNAME = os.path.dirname(__file__)
 DATASET_CHUNKED_HDF5_FILE = os.path.join(DIRNAME, "data", 'chunked.hdf5')
 
+NOT_CHUNKED_FILE = os.path.join(DIRNAME, "data", 'issue23_A_contiguous.nc')
+
 
 def test_lazy_index():
 
@@ -49,6 +56,54 @@ def simplest_check(x,y):
         assert hfile.visititems(simplest_check) is None
 
 
+def test_get_chunk_info_chunked():
+
+    # start lazy, then go real
+
+      with pyfive.File(DATASET_CHUNKED_HDF5_FILE) as hfile:
+
+        ds = hfile.get_lazy_view('dataset1')
+        assert ds.id._DatasetID__index_built==False
+
+        si = StoreInfo((0,0), 0, 4016, 16)
+        info = ds.id.get_chunk_info(0)
+        assert info == si
+
+        assert ds.id.get_num_chunks() == 88
+    
+        assert ds.id.btree_range == (1072, 8680)
+    
+
+def test_get_chunk_methods_contiguous():
+
+    with pyfive.File(NOT_CHUNKED_FILE) as hfile:
+
+        ds = hfile.get_lazy_view('q')
+        assert ds.id._DatasetID__index_built==False
+
+        with pytest.raises(TypeError):
+            ds.id.get_chunk_info(0)
+
+        with pytest.raises(TypeError):
+            ds.id.get_num_chunks()
+
+        with pytest.raises(TypeError):
+            ds.id.read_direct_chunk(0)
+
+        with pytest.raises(TypeError):
+            ds.id.btree_range
+
+        
+
+        
+
+
+
+
+
+
+
+
 
 
 

From 3040c3d8d33ffcee3176ce06cb258d6add592d3d Mon Sep 17 00:00:00 2001
From: Bryan Lawrence <bryan.lawrence@ncas.ac.uk>
Date: Tue, 28 Oct 2025 08:50:12 +0000
Subject: [PATCH 16/16] Better checking of chunk info testing answers, courtesy
 of @zequihg50

---
 tests/test_chunk_index_options.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/test_chunk_index_options.py b/tests/test_chunk_index_options.py
index 5ce2872..3b9380a 100644
--- a/tests/test_chunk_index_options.py
+++ b/tests/test_chunk_index_options.py
@@ -10,6 +10,7 @@
 import pyfive
 from pyfive.h5d import StoreInfo
 import pytest
+import h5py
 
 DIRNAME = os.path.dirname(__file__)
 DATASET_CHUNKED_HDF5_FILE = os.path.join(DIRNAME, "data", 'chunked.hdf5')
@@ -58,9 +59,13 @@ def simplest_check(x,y):
 
 def test_get_chunk_info_chunked():
 
-    # start lazy, then go real
+    # Start lazy, then go real
+    # we think we know what the right answers are, so we hard
+    # code them as well as check that's what h5py would return
 
-      with pyfive.File(DATASET_CHUNKED_HDF5_FILE) as hfile:
+      with pyfive.File(DATASET_CHUNKED_HDF5_FILE) as hfile, \
+            h5py.File(DATASET_CHUNKED_HDF5_FILE) as h5f, \
+            open(DATASET_CHUNKED_HDF5_FILE, "rb") as f:
 
         ds = hfile.get_lazy_view('dataset1')
         assert ds.id._DatasetID__index_built==False
@@ -68,10 +73,17 @@ def test_get_chunk_info_chunked():
         si = StoreInfo((0,0), 0, 4016, 16)
         info = ds.id.get_chunk_info(0)
         assert info == si
+        assert h5f["dataset1"].id.get_chunk_info(0) == si
 
         assert ds.id.get_num_chunks() == 88
+        assert h5f["dataset1"].id.get_num_chunks() == 88
     
         assert ds.id.btree_range == (1072, 8680)
+        f.seek(1072)
+        assert f.read(4) == b"TREE"  # only v1 btrees
+        f.seek(8680)
+        assert f.read(4) == b"TREE"  # only v1 btrees
+
     
 
 def test_get_chunk_methods_contiguous():