Skip to content

Commit 6283370

Browse files
committed
fix for traversing btrees of depth > 1
1 parent 388325b commit 6283370

File tree

3 files changed

+103
-18
lines changed

3 files changed

+103
-18
lines changed

clients/python/tests/test_h5p.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def test_invalid_dataset(self, init):
4545
assert init
4646
assert len(rsps) == 0
4747

48-
4948
def test_missing_invalid(self, init):
5049
resource = "ATL03_20181014012500_02350113_006_02.h5"
5150
r1 = h5.h5p([
@@ -139,3 +138,27 @@ def test_slice_invalid_ranges(self, init):
139138
# start > end
140139
rsps = h5.h5p([{"dataset": dataset, "slice": [[10, 5]]}], ATL06_FILE1, "icesat2")
141140
assert dataset not in rsps
141+
142+
def test_atl02(self, init):
143+
datasets = [
144+
{'dataset': "atlas/pce2/background/pce_mframe_cnt"},
145+
{'dataset': "atlas/pce3/altimetry/delta_time"},
146+
{'dataset': "/atlas/pce1/altimetry/strong/n_mf_ph"},
147+
]
148+
rsps = h5.h5p(datasets, "ATL02_20240930091206_02152507_007_01.h5", "icesat2")
149+
assert init
150+
for dataset in [row["dataset"] for row in datasets]:
151+
assert dataset in rsps
152+
assert len(rsps[dataset]) > 0
153+
assert rsps[dataset][0] > 0
154+
155+
def test_atl08(self, init):
156+
datasets = [
157+
{'dataset': "gt2l/land_segments/dem_flag"},
158+
]
159+
rsps = h5.h5p(datasets, "ATL08_20250911233934_13472812_007_01.h5", "icesat2")
160+
assert init
161+
for dataset in [row["dataset"] for row in datasets]:
162+
assert dataset in rsps
163+
assert len(rsps[dataset]) > 0
164+
assert rsps[dataset][0] > 0

packages/core/package/MathLib.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,14 @@ on 1 byte), but shoehorning those bytes into integers efficiently is messy.
577577
#pragma GCC diagnostic push
578578
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
579579

580+
#ifndef __has_feature
581+
#define __has_feature(x) 0
582+
#endif
583+
584+
#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
585+
#define VALGRIND
586+
#endif
587+
580588
#ifdef __be__
581589
# define HASH_LITTLE_ENDIAN 0
582590
# define HASH_BIG_ENDIAN 1
@@ -739,6 +747,7 @@ uint32_t MathLib::hashlittle( const void *key, size_t length, uint32_t initval)
739747
* still catch it and complain. The masking trick does make the hash
740748
* noticably faster for short strings (like English words).
741749
*/
750+
#ifndef VALGRIND
742751

743752
switch(length)
744753
{
@@ -757,6 +766,28 @@ uint32_t MathLib::hashlittle( const void *key, size_t length, uint32_t initval)
757766
case 0 : return c; /* zero length strings require no mixing */
758767
}
759768

769+
#else /* make valgrind happy */
770+
771+
const uint8_t *k8 = (const uint8_t *)k;
772+
switch(length)
773+
{
774+
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
775+
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
776+
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
777+
case 9 : c+=k8[8]; /* fall through */
778+
case 8 : b+=k[1]; a+=k[0]; break;
779+
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
780+
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
781+
case 5 : b+=k8[4]; /* fall through */
782+
case 4 : a+=k[0]; break;
783+
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
784+
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
785+
case 1 : a+=k8[0]; break;
786+
case 0 : return c;
787+
}
788+
789+
#endif /* !valgrind */
790+
760791
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
761792
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
762793
const uint8_t *k8;
@@ -899,6 +930,7 @@ void MathLib::hashlittle2(
899930
* still catch it and complain. The masking trick does make the hash
900931
* noticably faster for short strings (like English words).
901932
*/
933+
#ifndef VALGRIND
902934

903935
switch(length)
904936
{
@@ -917,6 +949,28 @@ void MathLib::hashlittle2(
917949
case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
918950
}
919951

952+
#else /* make valgrind happy */
953+
954+
const uint8_t *k8 = (const uint8_t *)k;
955+
switch(length)
956+
{
957+
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
958+
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
959+
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
960+
case 9 : c+=k8[8]; /* fall through */
961+
case 8 : b+=k[1]; a+=k[0]; break;
962+
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
963+
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
964+
case 5 : b+=k8[4]; /* fall through */
965+
case 4 : a+=k[0]; break;
966+
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
967+
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
968+
case 1 : a+=k8[0]; break;
969+
case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
970+
}
971+
972+
#endif /* !valgrind */
973+
920974
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
921975
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
922976
const uint8_t *k8;
@@ -1050,6 +1104,7 @@ uint32_t MathLib::hashbig( const void *key, size_t length, uint32_t initval)
10501104
* still catch it and complain. The masking trick does make the hash
10511105
* noticably faster for short strings (like English words).
10521106
*/
1107+
#ifndef VALGRIND
10531108

10541109
switch(length)
10551110
{
@@ -1068,6 +1123,28 @@ uint32_t MathLib::hashbig( const void *key, size_t length, uint32_t initval)
10681123
case 0 : return c; /* zero length strings require no mixing */
10691124
}
10701125

1126+
#else /* make valgrind happy */
1127+
1128+
const uint8_t *k8 = (const uint8_t *)k;
1129+
switch(length) /* all the case statements fall through */
1130+
{
1131+
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
1132+
case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
1133+
case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
1134+
case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
1135+
case 8 : b+=k[1]; a+=k[0]; break;
1136+
case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
1137+
case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
1138+
case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
1139+
case 4 : a+=k[0]; break;
1140+
case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
1141+
case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
1142+
case 1 : a+=((uint32_t)k8[0])<<24; break;
1143+
case 0 : return c;
1144+
}
1145+
1146+
#endif /* !VALGRIND */
1147+
10711148
} else { /* need to read the key one byte at a time */
10721149
const uint8_t *k = (const uint8_t *)key;
10731150

packages/h5/package/H5Dataset.cpp

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,22 +1642,7 @@ int H5Dataset::readNameIndex (uint64_t pos, const heap_info_t* heap_info)
16421642

16431643
/* Initialize B-Tree Processing */
16441644
const int dataset_path_length = StringLib::size(datasetPath[heap_info->dlvl]);
1645-
#ifndef __has_feature
1646-
#define __has_feature(x) 0
1647-
#endif
1648-
#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
1649-
if(H5CORO_ERROR_CHECKING)
1650-
{
1651-
if(dataset_path_length > STR_BUFF_SIZE)
1652-
{
1653-
throw RunTimeException(CRITICAL, RTE_FAILURE, "dataset path name <%s> too long: %d", datasetPath[heap_info->dlvl], dataset_path_length);
1654-
}
1655-
}
1656-
char dataset_path_buffer[STR_BUFF_SIZE];
1657-
StringLib::copy(dataset_path_buffer, datasetPath[heap_info->dlvl], dataset_path_length + 1);
1658-
#else
16591645
const char* dataset_path_buffer = datasetPath[heap_info->dlvl];
1660-
#endif
16611646
const index_info_t index_info = {
16621647
#ifdef __be__
16631648
.link_hash = MathLib::hashbig(dataset_path_buffer, dataset_path_length, 0),
@@ -1676,7 +1661,7 @@ int H5Dataset::readNameIndex (uint64_t pos, const heap_info_t* heap_info)
16761661
}
16771662

16781663
/* Traverse B-Tree */
1679-
pos += readNameIndexNode(root_node_address, heap_info, &index_info, num_records_in_root_node, 1);
1664+
pos += readNameIndexNode(root_node_address, heap_info, &index_info, num_records_in_root_node, 0);
16801665

16811666
/* Return Bytes Read */
16821667
return pos - starting_position;
@@ -2415,7 +2400,7 @@ int H5Dataset::readLinkInfoMsg (uint64_t pos, uint8_t hdr_flags, int dlvl)
24152400
{
24162401
for(const heap_t& address: heap_info.block_address_table)
24172402
{
2418-
print2term("Block Address Table: %lx, %lx, %d\n", address.logical, address.physical, address.size);
2403+
print2term("Block Address Table: 0x%lx, 0x%lx, %d\n", address.logical, address.physical, address.size);
24192404
}
24202405
}
24212406
if(ioContext->options & H5Coro::Context::OPTION_USE_NAME_INDEX)

0 commit comments

Comments
 (0)