fix for traversing btrees of depth > 1

jpswinski · jpswinski · commit 6283370b4214 · 2026-02-05T17:49:09.000Z
diff --git a/clients/python/tests/test_h5p.py b/clients/python/tests/test_h5p.py
@@ -45,7 +45,6 @@ def test_invalid_dataset(self, init):
         assert init
         assert len(rsps) == 0
 
-
     def test_missing_invalid(self, init):
         resource = "ATL03_20181014012500_02350113_006_02.h5"
         r1 = h5.h5p([
@@ -139,3 +138,27 @@ def test_slice_invalid_ranges(self, init):
         # start > end
         rsps = h5.h5p([{"dataset": dataset, "slice": [[10, 5]]}], ATL06_FILE1, "icesat2")
         assert dataset not in rsps
+
+    def test_atl02(self, init):
+        datasets = [
+            {'dataset': "atlas/pce2/background/pce_mframe_cnt"},
+            {'dataset': "atlas/pce3/altimetry/delta_time"},
+            {'dataset': "/atlas/pce1/altimetry/strong/n_mf_ph"},
+        ]
+        rsps = h5.h5p(datasets, "ATL02_20240930091206_02152507_007_01.h5", "icesat2")
+        assert init
+        for dataset in [row["dataset"] for row in datasets]:
+            assert dataset in rsps
+            assert len(rsps[dataset]) > 0
+            assert rsps[dataset][0] > 0
+
+    def test_atl08(self, init):
+        datasets = [
+            {'dataset': "gt2l/land_segments/dem_flag"},
+        ]
+        rsps = h5.h5p(datasets, "ATL08_20250911233934_13472812_007_01.h5", "icesat2")
+        assert init
+        for dataset in [row["dataset"] for row in datasets]:
+            assert dataset in rsps
+            assert len(rsps[dataset]) > 0
+            assert rsps[dataset][0] > 0
diff --git a/packages/core/package/MathLib.cpp b/packages/core/package/MathLib.cpp
@@ -577,6 +577,14 @@ on 1 byte), but shoehorning those bytes into integers efficiently is messy.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
 
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+#define VALGRIND
+#endif
+
 #ifdef __be__
 # define HASH_LITTLE_ENDIAN 0
 # define HASH_BIG_ENDIAN 1
@@ -739,6 +747,7 @@ uint32_t MathLib::hashlittle( const void *key, size_t length, uint32_t initval)
      * still catch it and complain.  The masking trick does make the hash
      * noticably faster for short strings (like English words).
      */
+#ifndef VALGRIND
 
     switch(length)
     {
@@ -757,6 +766,28 @@ uint32_t MathLib::hashlittle( const void *key, size_t length, uint32_t initval)
     case 0 : return c;              /* zero length strings require no mixing */
     }
 
+#else /* make valgrind happy */
+
+    const uint8_t *k8 = (const uint8_t *)k;
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
+    case 9 : c+=k8[8];                   /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
+    case 5 : b+=k8[4];                   /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
+    case 1 : a+=k8[0]; break;
+    case 0 : return c;
+    }
+
+#endif /* !valgrind */
+
   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
     const uint8_t  *k8;
@@ -899,6 +930,7 @@ void MathLib::hashlittle2(
      * still catch it and complain.  The masking trick does make the hash
      * noticably faster for short strings (like English words).
      */
+#ifndef VALGRIND
 
     switch(length)
     {
@@ -917,6 +949,28 @@ void MathLib::hashlittle2(
     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
     }
 
+#else /* make valgrind happy */
+
+    const uint8_t *k8 = (const uint8_t *)k;
+    switch(length)
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
+    case 9 : c+=k8[8];                   /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
+    case 5 : b+=k8[4];                   /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
+    case 1 : a+=k8[0]; break;
+    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
+    }
+
+#endif /* !valgrind */
+
   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
     const uint8_t  *k8;
@@ -1050,6 +1104,7 @@ uint32_t MathLib::hashbig( const void *key, size_t length, uint32_t initval)
      * still catch it and complain.  The masking trick does make the hash
      * noticably faster for short strings (like English words).
      */
+#ifndef VALGRIND
 
     switch(length)
     {
@@ -1068,6 +1123,28 @@ uint32_t MathLib::hashbig( const void *key, size_t length, uint32_t initval)
     case 0 : return c;              /* zero length strings require no mixing */
     }
 
+#else  /* make valgrind happy */
+
+    const uint8_t *k8 = (const uint8_t *)k;
+    switch(length)                   /* all the case statements fall through */
+    {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
+    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
+    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
+    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
+    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
+    case 4 : a+=k[0]; break;
+    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
+    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
+    case 1 : a+=((uint32_t)k8[0])<<24; break;
+    case 0 : return c;
+    }
+
+#endif /* !VALGRIND */
+
   } else {                        /* need to read the key one byte at a time */
     const uint8_t *k = (const uint8_t *)key;
 
diff --git a/packages/h5/package/H5Dataset.cpp b/packages/h5/package/H5Dataset.cpp
@@ -1642,22 +1642,7 @@ int H5Dataset::readNameIndex (uint64_t pos, const heap_info_t* heap_info)
 
     /* Initialize B-Tree Processing */
     const int dataset_path_length = StringLib::size(datasetPath[heap_info->dlvl]);
-    #ifndef __has_feature
-    #define __has_feature(x) 0
-    #endif
-    #if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
-    if(H5CORO_ERROR_CHECKING)
-    {
-        if(dataset_path_length > STR_BUFF_SIZE)
-        {
-            throw RunTimeException(CRITICAL, RTE_FAILURE, "dataset path name <%s> too long: %d", datasetPath[heap_info->dlvl], dataset_path_length);
-        }
-    }
-    char dataset_path_buffer[STR_BUFF_SIZE];
-    StringLib::copy(dataset_path_buffer, datasetPath[heap_info->dlvl], dataset_path_length + 1);
-    #else
     const char* dataset_path_buffer = datasetPath[heap_info->dlvl];
-    #endif
     const index_info_t index_info = {
     #ifdef __be__
         .link_hash = MathLib::hashbig(dataset_path_buffer, dataset_path_length, 0),
@@ -1676,7 +1661,7 @@ int H5Dataset::readNameIndex (uint64_t pos, const heap_info_t* heap_info)
     }
 
     /* Traverse B-Tree */
-    pos += readNameIndexNode(root_node_address, heap_info, &index_info, num_records_in_root_node, 1);
+    pos += readNameIndexNode(root_node_address, heap_info, &index_info, num_records_in_root_node, 0);
 
     /* Return Bytes Read */
     return pos - starting_position;
@@ -2415,7 +2400,7 @@ int H5Dataset::readLinkInfoMsg (uint64_t pos, uint8_t hdr_flags, int dlvl)
         {
             for(const heap_t& address: heap_info.block_address_table)
             {
-                print2term("Block Address Table:                                             %lx, %lx, %d\n", address.logical, address.physical, address.size);
+                print2term("Block Address Table:                                             0x%lx, 0x%lx, %d\n", address.logical, address.physical, address.size);
             }
         }
         if(ioContext->options & H5Coro::Context::OPTION_USE_NAME_INDEX)