From ddbfc7ead2378cd8d7ccf0ff23f57907f29a800e Mon Sep 17 00:00:00 2001 From: ssomnath <14300780+ssomnath@users.noreply.github.com> Date: Thu, 29 Oct 2020 11:37:50 -0400 Subject: [PATCH 1/5] make_indices_matrix now accepts int and 0 dim np arrays --- pyUSID/io/write_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyUSID/io/write_utils.py b/pyUSID/io/write_utils.py index e63ac63b..4d665538 100644 --- a/pyUSID/io/write_utils.py +++ b/pyUSID/io/write_utils.py @@ -227,8 +227,12 @@ def make_indices_matrix(num_steps, is_position=True): indices_matrix : 2D unsigned int numpy array arranged as [steps, spatial dimension] """ + if isinstance(num_steps, int): + num_steps = list(num_steps) if not isinstance(num_steps, (tuple, list, np.ndarray)): raise TypeError('num_steps should be a list / tuple / numpy array') + if isinstance(num_steps, np.ndarray) and num_steps.ndim < 1: + num_steps = np.expand_dims(num_steps, 0) if not contains_integers(num_steps, min_val=1 + int(len(num_steps) > 0)): raise ValueError('num_steps should contain integers greater than equal to 1 (empty dimension) or 2') From 42f7e1622c2487f60551dca804dce88ec5359e01 Mon Sep 17 00:00:00 2001 From: ssomnath <14300780+ssomnath@users.noreply.github.com> Date: Tue, 3 Nov 2020 11:25:28 -0500 Subject: [PATCH 2/5] make_indices_matrix now accepts int and unit dimensions --- pyUSID/io/write_utils.py | 13 +++++++++---- tests/io/test_write_utils.py | 23 +++++++++++++++++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/pyUSID/io/write_utils.py b/pyUSID/io/write_utils.py index 4d665538..d865d9f5 100644 --- a/pyUSID/io/write_utils.py +++ b/pyUSID/io/write_utils.py @@ -215,7 +215,7 @@ def make_indices_matrix(num_steps, is_position=True): Parameters ------------ - num_steps : List / numpy array + num_steps : List / numpy array / int Number of steps in each spatial or spectral dimension Note that the axes must be ordered from fastest varying to slowest varying is_position : bool, optional, default = True @@ -228,13 +228,18 @@ def make_indices_matrix(num_steps, is_position=True): arranged as [steps, spatial dimension] """ if isinstance(num_steps, int): - num_steps = list(num_steps) + num_steps = [num_steps] if not isinstance(num_steps, (tuple, list, np.ndarray)): raise TypeError('num_steps should be a list / tuple / numpy array') if isinstance(num_steps, np.ndarray) and num_steps.ndim < 1: num_steps = np.expand_dims(num_steps, 0) - if not contains_integers(num_steps, min_val=1 + int(len(num_steps) > 0)): - raise ValueError('num_steps should contain integers greater than equal to 1 (empty dimension) or 2') + if len(num_steps) == 0: + raise ValueError('num_steps should not be an empty array or list') + if len(num_steps) == 1 and num_steps[0] == 1: + num_steps = [1] + elif not contains_integers(num_steps, min_val=1 + int(len(num_steps) > 0)): + raise ValueError('num_steps should contain integers greater than equal' + ' to 1 (empty dimension) or 2') num_steps = np.array(num_steps) spat_dims = max(1, len(np.where(num_steps > 1)[0])) diff --git a/tests/io/test_write_utils.py b/tests/io/test_write_utils.py index bb9a9505..5ec56072 100644 --- a/tests/io/test_write_utils.py +++ b/tests/io/test_write_utils.py @@ -22,14 +22,29 @@ def test_dim_w_val_1(self): with self.assertRaises(ValueError): _ = write_utils.make_indices_matrix([1, 2, 3]) + def test_just_size_of_one_dim(self): + expected = np.expand_dims(np.arange(4), axis=0) + ret_val = write_utils.make_indices_matrix(4, is_position=False) + self.assertTrue(np.allclose(expected, ret_val)) + + def test_empty_list(self): + with self.assertRaises(ValueError): + _ = write_utils.make_indices_matrix([]) + + def test_single_value_dimension_int_input(self): + expected = np.expand_dims(np.arange(1), axis=0) + ret_val = write_utils.make_indices_matrix(1, is_position=False) + self.assertTrue(np.allclose(expected, ret_val)) + + def test_single_value_dimension_list_input(self): + expected = np.expand_dims(np.arange(1), axis=0) + ret_val = write_utils.make_indices_matrix([1], is_position=False) + self.assertTrue(np.allclose(expected, ret_val)) + def test_non_int_dim_sizes(self): with self.assertRaises(ValueError): _ = write_utils.make_indices_matrix([1.233, 2.4, 3]) - def test_not_list(self): - with self.assertRaises(TypeError): - _ = write_utils.make_indices_matrix(1) - def test_weird_inputs(self): with self.assertRaises(ValueError): _ = write_utils.make_indices_matrix([2, 'hello', 3]) From 961c1bb80669253dd1a929eade5b364f3bf9c54c Mon Sep 17 00:00:00 2001 From: ssomnath <14300780+ssomnath@users.noreply.github.com> Date: Thu, 5 Nov 2020 15:51:58 -0500 Subject: [PATCH 3/5] Removing code already present in sidpy --- pyUSID/io/reg_ref.py | 529 +------------------------------------------ 1 file changed, 1 insertion(+), 528 deletions(-) diff --git a/pyUSID/io/reg_ref.py b/pyUSID/io/reg_ref.py index def038a2..54509bc3 100644 --- a/pyUSID/io/reg_ref.py +++ b/pyUSID/io/reg_ref.py @@ -7,539 +7,14 @@ from __future__ import division, print_function, absolute_import, unicode_literals import sys -from collections import Iterable -from warnings import warn import h5py -import numpy as np -from sidpy.base.string_utils import clean_string_att +from sidpy.hdf.reg_ref import * from .hdf_utils import check_if_main if sys.version_info.major == 3: unicode = str -__all__ = ['get_region', 'clean_reg_ref', 'attempt_reg_ref_build', 'copy_reg_ref_reduced_dim', - 'create_region_reference', 'get_indices_for_region_ref', 'simple_region_ref_copy', 'write_region_references'] - - -def get_region(h5_dset, reg_ref_name): - """ - Gets the region in a dataset specified by a region reference - - Parameters - ---------- - h5_dset : h5py.Dataset - Dataset containing the region reference - reg_ref_name : str / unicode - Name of the region reference - - Returns - ------- - value : np.ndarray - Data specified by the region reference. Note that a squeeze is applied by default. - """ - warn('pyUSID.io.reg.ref.get_region will be moved to pyNSID in the next ' - 'pyUSID version.', FutureWarning) - if not isinstance(reg_ref_name, (str, unicode)): - raise TypeError('reg_ref_name should be a string') - if not isinstance(h5_dset, h5py.Dataset): - raise TypeError('h5_dset should be of type h5py.Dataset') - # this may raise KeyErrors. Let it - reg_ref = h5_dset.attrs[reg_ref_name] - return np.squeeze(h5_dset[reg_ref]) - - -def clean_reg_ref(h5_dset, reg_ref_tuple, verbose=False): - """ - Makes sure that the provided instructions for a region reference are indeed valid - This method has become necessary since h5py allows the writing of region references larger than the maxshape - - Parameters - ---------- - h5_dset : h5.Dataset instance - Dataset to which region references will be added as attributes - reg_ref_tuple : list / tuple - The slicing information formatted using tuples of slice objects. - verbose : Boolean (Optional. Default = False) - Whether or not to print status messages - - Returns - ------- - new_reg_refs : tuple - Instructions for the corrected region reference - """ - warn('pyUSID.io.reg.ref.clean_reg_ref will be moved to pyNSID in the next ' - 'pyUSID version.', FutureWarning) - if not isinstance(reg_ref_tuple, (tuple, dict, slice)): - raise TypeError('slices should be a tuple, list, or slice but is instead of type ' - '{}'.format(type(reg_ref_tuple))) - if not isinstance(h5_dset, h5py.Dataset): - raise TypeError('h5_dset should be a h5py.Dataset object but is instead of type ' - '{}'.format(type(h5_dset))) - - if isinstance(reg_ref_tuple, slice): - # 1D dataset - reg_ref_tuple = [reg_ref_tuple] - - if len(reg_ref_tuple) != len(h5_dset.shape): - raise ValueError('Region reference tuple did not have the same dimensions as the h5 dataset') - - if verbose: - print('Comparing {} with h5 dataset maxshape of {}'.format(reg_ref_tuple, h5_dset.maxshape)) - - new_reg_refs = list() - - for reg_ref_slice, max_size in zip(reg_ref_tuple, h5_dset.maxshape): - if not isinstance(reg_ref_slice, slice): - raise TypeError('slices should be a tuple or a list but is instead of type ' - '{}'.format(type(reg_ref_slice))) - - # For now we will simply make sure that the end of the slice is <= maxshape - if max_size is not None and reg_ref_slice.stop is not None: - reg_ref_slice = slice(reg_ref_slice.start, min(reg_ref_slice.stop, max_size), reg_ref_slice.step) - - new_reg_refs.append(reg_ref_slice) - - if verbose: - print('Region reference tuple now: {}'.format(new_reg_refs)) - - return tuple(new_reg_refs) - - -def attempt_reg_ref_build(h5_dset, dim_names, verbose=False): - """ - - Parameters - ---------- - h5_dset : h5.Dataset instance - Dataset to which region references need to be added as attributes - dim_names : list or tuple - List of the names of the region references (typically names of dimensions) - verbose : bool, optional. Default=False - Whether or not to print debugging statements - - Returns - ------- - labels_dict : dict - The slicing information must be formatted using tuples of slice objects. - For example {'region_1':(slice(None, None), slice (0,1))} - """ - warn('pyUSID.io.reg.ref.attempt_reg_ref_build will be moved to pyNSID in the next ' - 'pyUSID version.', FutureWarning) - if not isinstance(h5_dset, h5py.Dataset): - raise TypeError('h5_dset should be a h5py.Dataset object but is instead of type ' - '{}.'.format(type(h5_dset))) - if not isinstance(dim_names, (list, tuple)): - raise TypeError('slices should be a list or tuple but is instead of type ' - '{}'.format(type(dim_names))) - - if len(h5_dset.shape) != 2: - return dict() - - if not np.all([isinstance(obj, (str, unicode)) for obj in dim_names]): - raise TypeError('Unable to automatically generate region references for dataset: {} since one or more names' - ' of the region references was not a string'.format(h5_dset.name)) - - labels_dict = dict() - if len(dim_names) == h5_dset.shape[0]: - if verbose: - print('Most likely a spectroscopic indices / values dataset') - for dim_index, curr_name in enumerate(dim_names): - labels_dict[curr_name] = (slice(dim_index, dim_index + 1), slice(None)) - elif len(dim_names) == h5_dset.shape[1]: - if verbose: - print('Most likely a position indices / values dataset') - for dim_index, curr_name in enumerate(dim_names): - labels_dict[curr_name] = (slice(None), slice(dim_index, dim_index + 1)) - - if len(labels_dict) > 0: - warn('Attempted to automatically build region reference dictionary for dataset: {}.\n' - 'Please specify region references as a tuple of slice objects for each attribute'.format(h5_dset.name)) - else: - if verbose: - print('Could not build region references since dataset had shape:{} and number of region references is ' - '{}'.format(h5_dset.shape, len(dim_names))) - return labels_dict - - -def get_indices_for_region_ref(h5_main, ref, return_method='slices'): - """ - Given an hdf5 region reference and the dataset it refers to, - return an array of indices within that dataset that - correspond to the reference. - - Parameters - ---------- - h5_main : HDF5 Dataset - dataset that the reference can be returned from - ref : HDF5 Region Reference - Region reference object - return_method : {'slices', 'corners', 'points'} - slices : the reference is return as pairs of slices - - corners : the reference is returned as pairs of corners representing - the starting and ending indices of each block - - points : the reference is returns as a list of tuples of points - - Returns - ------- - ref_inds : Numpy Array - array of indices in the source dataset that ref accesses - - """ - warn('pyUSID.io.reg.ref.get_indices_for_region_ref will be moved to pyNSID in the next ' - 'pyUSID version.', FutureWarning) - if not isinstance(h5_main, h5py.Dataset): - raise TypeError('h5_main should be a h5py.Dataset object') - if not isinstance(ref, h5py.RegionReference): - raise TypeError('ref should be a h5py.RegionReference object') - if return_method is not None: - if not isinstance(return_method, (str, unicode)): - raise TypeError('return_method should be a string') - - if return_method == 'points': - def __corners_to_point_array(start, stop): - """ - Convert a pair of tuples representing two opposite corners of an HDF5 region reference - into a list of arrays for each dimension. - - Parameters - ---------- - start : Tuple - the starting indices of the region - stop : Tuple - the final indices of the region - - Returns - ------- - inds : Tuple of arrays - the list of points in each dimension - - """ - ranges = [] - for i in range(len(start)): - if start[i] == stop[i]: - ranges.append([stop[i]]) - else: - ranges.append(np.arange(start[i], stop[i] + 1, dtype=np.uint)) - grid = np.meshgrid(*ranges, indexing='ij') - - ref_inds = np.asarray(zip(*(x.flat for x in grid))) - - return ref_inds - - return_func = __corners_to_point_array - elif return_method == 'corners': - def __corners_to_corners(start, stop): - return start, stop - - return_func = __corners_to_corners - elif return_method == 'slices': - def __corners_to_slices(start, stop): - """ - Convert a pair of tuples representing two opposite corners of an HDF5 region reference - into a pair of slices. - - Parameters - ---------- - start : Tuple - the starting indices of the region - stop : Tuple - the final indices of the region - - Returns - ------- - slices : list - pair of slices representing the region - - """ - slices = [] - for idim in range(len(start)): - slices.append(slice(start[idim], stop[idim])) - - return slices - - return_func = __corners_to_slices - - region = h5py.h5r.get_region(ref, h5_main.id) - reg_type = region.get_select_type() - if reg_type == 2: - """ - Reference is hyperslabs - """ - ref_inds = [] - for start, end in region.get_select_hyper_blocklist(): - ref_inds.append(return_func(start, end)) - ref_inds = np.array(ref_inds).reshape(-1, len(start)) - - elif reg_type == 3: - """ - Reference is single block - """ - start, end = region.get_select_bounds() - - ref_inds = return_func(start, end) - else: - warn('No method currently exists for converting this type of reference.') - ref_inds = np.empty(0) - - return ref_inds - - -def copy_reg_ref_reduced_dim(h5_source, h5_target, h5_source_inds, h5_target_inds, key): - """ - Copies a region reference from one dataset to another taking into account that a dimension - has been lost from source to target - - Parameters - ---------- - h5_source : HDF5 Dataset - source dataset for region reference copy - h5_target : HDF5 Dataset - target dataset for region reference copy - h5_source_inds : HDF5 Dataset - indices of each dimension of the h5_source dataset - h5_target_inds : HDF5 Dataset - indices of each dimension of the h5_target dataset - key : String - Name of attribute in h5_source that contains - the Region Reference to copy - - Returns - ------- - ref_inds : Nx2x2 array of unsigned integers - Array containing pairs of points that define - the corners of each hyperslab in the region - reference - - """ - warn('pyUSID.io.reg.ref.copy_reg_ref_reduced_dim will be moved to pyNSID in the next ' - 'pyUSID version.', FutureWarning) - for param, param_name in zip([h5_source, h5_target, h5_source_inds, h5_target_inds], - ['h5_source', 'h5_target', 'h5_source_inds', 'h5_target_inds']): - if not isinstance(param, h5py.Dataset): - raise TypeError(param_name + ' should be a h5py.Dataset object') - if not isinstance(key, (str, unicode)): - raise TypeError('key should be a string') - key = key.strip() - - ''' - Determine which dimension is missing from the target - ''' - lost_dim = [] - for dim in h5_source_inds.attrs['labels']: - if dim not in h5_target_inds.attrs['labels']: - lost_dim.append(np.where(h5_source_inds.attrs['labels'] == dim)[0]) - ref = h5_source.attrs[key] - ref_inds = get_indices_for_region_ref(h5_source, ref, return_method='corners') - ''' - Convert to proper spectroscopic dimensions - First is special case for a region reference that spans the entire dataset - ''' - if len(ref_inds.shape) == 2 and all(ref_inds[0] == [0, 0]) and all(ref_inds[1] + 1 == h5_source.shape): - ref_inds[1, 1] = h5_target.shape[1] - 1 - ref_inds = np.expand_dims(ref_inds, 0) - else: - ''' - More common case of reference made of hyperslabs - ''' - spec_ind_zeroes = np.where(h5_source_inds[lost_dim] == 0)[1] - - ref_inds = ref_inds.reshape([-1, 2, 2]) - - for start, stop in ref_inds[:-1]: - start[1] = np.where(start[1] == spec_ind_zeroes)[0] - stop[1] = np.where(stop[1] == spec_ind_zeroes - 1)[0] - 1 - - ref_inds[-1, 0, 1] = np.where(ref_inds[-1, 0, 1] == spec_ind_zeroes)[0] - stop = np.where(ref_inds[-1, 1, 1] == spec_ind_zeroes - 1)[0] - if stop.size == 0: - stop = len(spec_ind_zeroes) - ref_inds[-1, 1, 1] = stop - 1 - ''' - Create the new reference from the indices - ''' - h5_target.attrs[key] = create_region_reference(h5_target, ref_inds) - - return ref_inds - - -def create_region_reference(h5_main, ref_inds): - """ - Create a region reference in the destination dataset using an iterable of pairs of indices - representing the start and end points of a hyperslab block - - Parameters - ---------- - h5_main : HDF5 dataset - dataset the region will be created in - ref_inds : Iterable - index pairs, [start indices, final indices] for each block in the - hyperslab - - Returns - ------- - new_ref : HDF5 Region reference - reference in `h5_main` for the blocks of points defined by `ref_inds` - - """ - warn('pyUSID.io.reg.ref.create_region_reference will be moved to pyNSID ' - 'in the next pyUSID version.', FutureWarning) - if not isinstance(h5_main, h5py.Dataset): - raise TypeError('h5_main should be a h5py.Dataset object') - if not isinstance(ref_inds, Iterable): - raise TypeError('ref_inds should be a list or tuple') - - h5_space = h5_main.id.get_space() - h5_space.select_none() - - for start, stop in ref_inds: - block = stop - start + 1 - h5_space.select_hyperslab(tuple(start), (1, 1), block=tuple(block), op=1) - - if not h5_space.select_valid(): - warn('Could not create new region reference.') - return None - new_ref = h5py.h5r.create(h5_main.id, b'.', h5py.h5r.DATASET_REGION, space=h5_space) - - return new_ref - - -def simple_region_ref_copy(h5_source, h5_target, key): - """ - Copies a region reference from one dataset to another - without alteration - - Parameters - ---------- - h5_source : HDF5 Dataset - source dataset for region reference copy - h5_target : HDF5 Dataset - target dataset for region reference copy - key : String - Name of attribute in h5_source that contains - the Region Reference to copy - - Returns - ------- - ref_inds : Nx2x2 array of unsigned integers - Array containing pairs of points that define - the corners of each hyperslab in the region - reference - - """ - warn('pyUSID.io.reg.ref.simple_region_ref_copy will be moved to pyNSID ' - 'in the next pyUSID version.', FutureWarning) - for param, param_name in zip([h5_source, h5_target], ['h5_source', 'h5_target']): - if not isinstance(param, h5py.Dataset): - raise TypeError(param_name + ' should be a h5py.Dataset object') - if not isinstance(key, (str, unicode)): - raise TypeError('key should be a string') - - ref = h5_source.attrs[key] - ref_inds = get_indices_for_region_ref(h5_source, ref, return_method='corners') - ref_inds = ref_inds.reshape([-1, 2, 2]) - ref_inds[:, 1, 1] = h5_target.shape[1] - 1 - target_ref = create_region_reference(h5_target, ref_inds) - h5_target.attrs[key] = target_ref - return ref_inds - - -def copy_all_region_refs(h5_source, h5_target): - """ - Copies only region references from the source dataset to the target dataset - - Parameters - ---------- - h5_source : h5py.Dataset - Dataset from which to copy region references - h5_target : h5py.Dataset - Dataset to which to copy region references to - - """ - warn('pyUSID.io.reg.ref.copy_all_region_refs will be moved to pyNSID ' - 'in the next pyUSID version.', FutureWarning) - if not isinstance(h5_source, h5py.Dataset): - raise TypeError("'h5_source' should be a h5py.Dataset object") - if not isinstance(h5_target, h5py.Dataset): - raise TypeError("'h5_target' should be a h5py.Dataset object") - for key in h5_source.attrs.keys(): - if not isinstance(h5_source.attrs[key], h5py.RegionReference): - continue - simple_region_ref_copy(h5_source, h5_target, key) - - -def write_region_references(h5_dset, reg_ref_dict, add_labels_attr=True, verbose=False): - """ - Creates attributes of a h5py.Dataset that refer to regions in the dataset - - Parameters - ---------- - h5_dset : h5.Dataset instance - Dataset to which region references will be added as attributes - reg_ref_dict : dict - The slicing information must be formatted using tuples of slice objects. - For example {'region_1':(slice(None, None), slice (0,1))} - add_labels_attr : bool, optional, default = True - Whether or not to write an attribute named 'labels' with the - verbose : Boolean (Optional. Default = False) - Whether or not to print status messages - """ - warn('pyUSID.io.reg.ref.write_region_references will be moved to pyNSID ' - 'in the next pyUSID version.', FutureWarning) - if not isinstance(reg_ref_dict, dict): - raise TypeError('slices should be a dictionary but is instead of type ' - '{}'.format(type(reg_ref_dict))) - if not isinstance(h5_dset, h5py.Dataset): - raise TypeError('h5_dset should be a h5py.Dataset object but is instead of type ' - '{}'.format(type(h5_dset))) - - if verbose: - print('Starting to write Region References to Dataset', h5_dset.name, 'of shape:', h5_dset.shape) - for reg_ref_name, reg_ref_tuple in reg_ref_dict.items(): - if verbose: - print('About to write region reference:', reg_ref_name, ':', reg_ref_tuple) - - reg_ref_tuple = clean_reg_ref(h5_dset, reg_ref_tuple, verbose=verbose) - - h5_dset.attrs[reg_ref_name] = h5_dset.regionref[reg_ref_tuple] - - if verbose: - print('Wrote Region Reference:%s' % reg_ref_name) - - ''' - Next, write these label names as an attribute called labels - Now make an attribute called 'labels' that is a list of strings - First ascertain the dimension of the slicing: - ''' - if add_labels_attr: - found_dim = False - dimen_index = None - - for key, val in reg_ref_dict.items(): - if not isinstance(val, (list, tuple)): - reg_ref_dict[key] = [val] - - for dimen_index, slice_obj in enumerate(list(reg_ref_dict.values())[0]): - # We make the assumption that checking the start is sufficient - if slice_obj.start is not None: - found_dim = True - break - if found_dim: - headers = [None] * len(reg_ref_dict) # The list that will hold all the names - for col_name in reg_ref_dict.keys(): - headers[reg_ref_dict[col_name][dimen_index].start] = col_name - if verbose: - print('Writing header attributes: {}'.format('labels')) - # Now write the list of col / row names as an attribute: - h5_dset.attrs['labels'] = clean_string_att(headers) - else: - warn('Unable to write region references for %s' % (h5_dset.name.split('/')[-1])) - - if verbose: - print('Wrote Region References of Dataset %s' % (h5_dset.name.split('/')[-1])) - def copy_region_refs(h5_source, h5_target): """ @@ -557,8 +32,6 @@ def copy_region_refs(h5_source, h5_target): ''' Check both h5_source and h5_target to ensure that are Main ''' - warn('pyUSID.io.reg.ref.copy_region_refs will be moved to pyNSID ' - 'in the next pyUSID version.', FutureWarning) are_main = all([check_if_main(h5_source), check_if_main(h5_target)]) if not all([isinstance(h5_source, h5py.Dataset), isinstance(h5_target, h5py.Dataset)]): raise TypeError('Inputs to copy_region_refs must be HDF5 Datasets') From ff365684d1badf7bf613285bf3ee0dfb532a6800 Mon Sep 17 00:00:00 2001 From: ssomnath <14300780+ssomnath@users.noreply.github.com> Date: Thu, 5 Nov 2020 15:54:21 -0500 Subject: [PATCH 4/5] Removing code already present in sidpy --- tests/io/test_reg_ref.py | 314 +-------------------------------------- 1 file changed, 2 insertions(+), 312 deletions(-) diff --git a/tests/io/test_reg_ref.py b/tests/io/test_reg_ref.py index d54f0342..3ee7a753 100644 --- a/tests/io/test_reg_ref.py +++ b/tests/io/test_reg_ref.py @@ -24,326 +24,16 @@ class TestRegRef(unittest.TestCase): - @staticmethod - def __delete_existing_file(file_path): - if os.path.exists(file_path): - os.remove(file_path) - - @staticmethod - def __write_safe_attrs(h5_object, attrs): - for key, val in attrs.items(): - h5_object.attrs[key] = val - - @staticmethod - def __write_string_list_as_attr(h5_object, attrs): - for key, val in attrs.items(): - h5_object.attrs[key] = np.array(val, dtype='S') - - @staticmethod - def __write_aux_reg_ref(h5_dset, labels, is_spec=True): - for index, reg_ref_name in enumerate(labels): - if is_spec: - reg_ref_tuple = (slice(index, index + 1), slice(None)) - else: - reg_ref_tuple = (slice(None), slice(index, index + 1)) - h5_dset.attrs[reg_ref_name] = h5_dset.regionref[reg_ref_tuple] - - @staticmethod - def __write_main_reg_refs(h5_dset, attrs): - for reg_ref_name, reg_ref_tuple in attrs.items(): - h5_dset.attrs[reg_ref_name] = h5_dset.regionref[reg_ref_tuple] - TestRegRef.__write_string_list_as_attr(h5_dset, {'labels': list(attrs.keys())}) - def setUp(self): data_utils.make_beps_file() def tearDown(self): data_utils.delete_existing_file(data_utils.std_beps_path) - - def test_get_indices_for_region_ref_corners(self): - with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: - h5_main = h5_f['/Raw_Measurement/source_main'] - ref_in = get_attr(h5_main, 'even_rows') - ret_val = reg_ref.get_indices_for_region_ref(h5_main, ref_in, 'corners') - expected_pos = np.repeat(np.arange(h5_main.shape[0])[::2], 2) - expected_spec = np.tile(np.array([0, h5_main.shape[1] - 1]), expected_pos.size // 2) - expected_corners = np.vstack((expected_pos, expected_spec)).T - self.assertTrue(np.allclose(ret_val, expected_corners)) - - def test_get_indices_for_region_ref_slices(self): - with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: - h5_main = h5_f['/Raw_Measurement/source_main'] - ref_in = get_attr(h5_main, 'even_rows') - ret_val = reg_ref.get_indices_for_region_ref(h5_main, ref_in, 'slices') - spec_slice = slice(0, h5_main.shape[1] - 1, None) - expected_slices = np.array([[slice(x, x, None), spec_slice] for x in np.arange(h5_main.shape[0])[::2]]) - self.assertTrue(np.all(ret_val == expected_slices)) def test_copy_reg_ref_reduced_dim(self): # TODO: Fill this test in at earliest convenience. Overriden temporarily assert True - def test_write_reg_ref_main_one_dim(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - data = np.random.rand(7) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Main', data=data) - reg_refs = {'even_rows': (slice(0, None, 2)), - 'odd_rows': (slice(1, None, 2))} - reg_ref.write_region_references(h5_dset, reg_refs, add_labels_attr=True) - self.assertEqual(len(h5_dset.attrs), 1 + len(reg_refs)) - actual = get_attr(h5_dset, 'labels') - self.assertTrue(np.all([x == y for x, y in zip(actual, ['even_rows', 'odd_rows'])])) - - expected_data = [data[0:None:2], data[1:None:2]] - written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]] - - for exp, act in zip(expected_data, written_data): - self.assertTrue(np.allclose(exp, act)) - - os.remove(file_path) - - def test_write_reg_ref_main_1st_dim(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - data = np.random.rand(5, 7) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Main', data=data) - reg_refs = {'even_rows': (slice(0, None, 2), slice(None)), - 'odd_rows': (slice(1, None, 2), slice(None))} - reg_ref.write_region_references(h5_dset, reg_refs, add_labels_attr=True) - self.assertEqual(len(h5_dset.attrs), 1 + len(reg_refs)) - actual = get_attr(h5_dset, 'labels') - self.assertTrue(np.all([x == y for x, y in zip(actual, ['even_rows', 'odd_rows'])])) - - expected_data = [data[0:None:2], data[1:None:2]] - written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]] - - for exp, act in zip(expected_data, written_data): - self.assertTrue(np.allclose(exp, act)) - - os.remove(file_path) - - def test_write_reg_ref_main_2nd_dim(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - data = np.random.rand(5, 7) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Main', data=data) - reg_refs = {'even_rows': (slice(None), slice(0, None, 2)), - 'odd_rows': (slice(None), slice(1, None, 2))} - reg_ref.write_region_references(h5_dset, reg_refs, add_labels_attr=False) - self.assertEqual(len(h5_dset.attrs), len(reg_refs)) - self.assertTrue('labels' not in h5_dset.attrs.keys()) - - expected_data = [data[:, 0:None:2], data[:, 1:None:2]] - written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]] - - for exp, act in zip(expected_data, written_data): - self.assertTrue(np.allclose(exp, act)) - - os.remove(file_path) - - def test_simple_region_ref_copy(self): - # based on test_hdf_writer.test_write_legal_reg_ref_multi_dim_data() - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - data = np.random.rand(5, 7) - h5_orig_dset = h5_f.create_dataset('test', data=data) - self.assertIsInstance(h5_orig_dset, h5py.Dataset) - - attrs = {'labels': {'even_rows': (slice(0, None, 2), slice(None)), - 'odd_rows': (slice(1, None, 2), slice(None))}} - - data_utils.write_main_reg_refs(h5_orig_dset, attrs['labels']) - h5_f.flush() - - # two atts point to region references. one for labels - self.assertEqual(len(h5_orig_dset.attrs), 1 + len(attrs['labels'])) - - # check if the labels attribute was written: - - self.assertTrue(np.all([x in list(attrs['labels'].keys()) for x in get_attr(h5_orig_dset, - 'labels')])) - - expected_data = [data[:None:2], data[1:None:2]] - written_data = [h5_orig_dset[h5_orig_dset.attrs['even_rows']], h5_orig_dset[h5_orig_dset.attrs['odd_rows']]] - - for exp, act in zip(expected_data, written_data): - self.assertTrue(np.allclose(exp, act)) - - # Now write a new dataset without the region reference: - h5_new_dset = h5_f.create_dataset('other', data=data) - self.assertIsInstance(h5_orig_dset, h5py.Dataset) - h5_f.flush() - - for key in attrs['labels'].keys(): - reg_ref.simple_region_ref_copy(h5_orig_dset, h5_new_dset, key) - - # now check to make sure that this dataset also has the same region references: - written_data = [h5_new_dset[h5_new_dset.attrs['even_rows']], h5_new_dset[h5_new_dset.attrs['odd_rows']]] - - for exp, act in zip(expected_data, written_data): - self.assertTrue(np.allclose(exp, act)) - - os.remove(file_path) - - def test_create_region_ref(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - data = np.random.rand(5, 7) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Source', data=data) - pos_inds = np.arange(0, h5_dset.shape[0], 2) - ref_inds = [((pos_start, 0), (pos_start, h5_dset.shape[1]-1)) for pos_start in pos_inds] - ref_inds = np.array(ref_inds) - this_reg_ref = reg_ref.create_region_reference(h5_dset, ref_inds) - ref_slices = list() - for start, stop in ref_inds: - ref_slices.append([slice(start[0], stop[0]+1), slice(start[1], None)]) - - h5_reg = h5_dset[this_reg_ref] - - h5_slice = np.vstack([h5_dset[pos_slice, spec_slice] for (pos_slice, spec_slice) in ref_slices]) - - self.assertTrue(np.allclose(h5_reg, h5_slice)) - - os.remove(file_path) - - def test_get_region_illegal_01(self): - with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: - with self.assertRaises(KeyError): - reg_ref.get_region(h5_f['/Raw_Measurement/source_main'], 'non_existent') - - def test_get_region_legal_01(self): - with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: - h5_source = h5_f['/Raw_Measurement/source_main'] - returned = reg_ref.get_region(h5_source, 'even_rows') - self.assertTrue(np.all(returned == h5_source[range(0, h5_source.shape[0], 2)])) - - def test_clean_reg_refs_1d(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Test', data=np.random.rand(7)) - ref_in = (slice(0, None, 2)) - cleaned = reg_ref.clean_reg_ref(h5_dset, ref_in) - self.assertEqual(ref_in, cleaned[0]) - os.remove(file_path) - - def test_clean_reg_refs_2d(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Test', data=np.random.rand(7, 5)) - ref_in = (slice(0, None, 2), slice(None)) - cleaned = reg_ref.clean_reg_ref(h5_dset, ref_in) - self.assertTrue(np.all([x == y for x, y in zip(ref_in, cleaned)])) - os.remove(file_path) - - def test_clean_reg_refs_illegal_too_many_slices(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Test', data=np.random.rand(7, 5)) - ref_in = (slice(0, None, 2), slice(None), slice(1, None, 2)) - with self.assertRaises(ValueError): - _ = reg_ref.clean_reg_ref(h5_dset, ref_in) - - os.remove(file_path) - - def test_clean_reg_refs_illegal_too_few_slices(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Test', data=np.random.rand(7, 5)) - ref_in = (slice(0, None, 2)) - with self.assertRaises(ValueError): - _ = reg_ref.clean_reg_ref(h5_dset, ref_in) - - os.remove(file_path) - - def test_clean_reg_refs_out_of_bounds(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Test', data=np.random.rand(7, 5)) - ref_in = (slice(0, 13, 2), slice(None)) - expected = (slice(0, 7, 2), slice(None)) - cleaned = reg_ref.clean_reg_ref(h5_dset, ref_in, verbose=False) - self.assertTrue(np.all([x == y for x, y in zip(expected, cleaned)])) - os.remove(file_path) - - def test_attempt_reg_ref_build_spec(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Indices', data=np.random.rand(2, 5)) - dim_names = ['Bias', 'Cycle'] - expected = {'Bias': (slice(0, 1), slice(None)), - 'Cycle': (slice(1, 2), slice(None))} - if sys.version_info.major == 3: - with self.assertWarns(UserWarning): - cleaned = reg_ref.attempt_reg_ref_build(h5_dset, dim_names) - else: - cleaned = reg_ref.attempt_reg_ref_build(h5_dset, dim_names) - for key, value in expected.items(): - self.assertEqual(value, cleaned[key]) - os.remove(file_path) - - def test_attempt_reg_ref_build_pos(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Indices', data=np.random.rand(5, 2)) - dim_names = ['Bias', 'Cycle'] - expected = {'Bias': (slice(None), slice(0, 1)), - 'Cycle': (slice(None), slice(1, 2))} - if sys.version_info.major == 3: - with self.assertWarns(UserWarning): - cleaned = reg_ref.attempt_reg_ref_build(h5_dset, dim_names) - else: - cleaned = reg_ref.attempt_reg_ref_build(h5_dset, dim_names) - for key, value in expected.items(): - self.assertEqual(value, cleaned[key]) - os.remove(file_path) - - def test_attempt_reg_ref_build_pos_too_many_dims(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Indices', data=np.random.rand(5, 2)) - dim_names = ['Bias', 'Cycle', 'Blah'] - ret_val = reg_ref.attempt_reg_ref_build(h5_dset, dim_names) - self.assertEqual(ret_val, dict()) - os.remove(file_path) - - def test_attempt_reg_ref_build_pos_too_few_dims(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset = h5_f.create_dataset('Indices', data=np.random.rand(5, 2)) - dim_names = ['Bias'] - ret_val = reg_ref.attempt_reg_ref_build(h5_dset, dim_names) - self.assertEqual(ret_val, dict()) - os.remove(file_path) - - def test_copy_region_refs(self): - file_path = 'test.h5' - data_utils.delete_existing_file(file_path) - data = np.random.rand(11, 7) - with h5py.File(file_path, mode='w') as h5_f: - h5_dset_source = h5_f.create_dataset('Source', data=data) - h5_dset_dest = h5_f.create_dataset('Target', data=data) - source_ref = h5_dset_source.regionref[0:-1:2] - h5_dset_source.attrs['regref'] = source_ref - - reg_ref.copy_region_refs(h5_dset_source, h5_dset_dest) - - self.assertTrue( - np.allclose(h5_dset_source[h5_dset_source.attrs['regref']], - h5_dset_dest[h5_dset_dest.attrs['regref']])) - os.remove(file_path) +if __name__ == '__main__': + unittest.main() From 0060abd08b96baaf72285fa24d39562b013c1761 Mon Sep 17 00:00:00 2001 From: ssomnath <14300780+ssomnath@users.noreply.github.com> Date: Thu, 5 Nov 2020 16:30:18 -0500 Subject: [PATCH 5/5] Raveling the pos and spec slices to 1D --- pyUSID/io/usi_data.py | 4 ++-- tests/io/test_usi_dataset.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyUSID/io/usi_data.py b/pyUSID/io/usi_data.py index fbc2cc2c..3b10e5f6 100644 --- a/pyUSID/io/usi_data.py +++ b/pyUSID/io/usi_data.py @@ -488,8 +488,8 @@ def slice(self, slice_dict, ndim_form=True, as_scalar=False, verbose=False, lazy if verbose: print('data_slice of shape: {} after squeezing'.format(data_slice.shape)) - pos_inds = self.h5_pos_inds[pos_slice, :] - spec_inds = self.h5_spec_inds[:, spec_slice].reshape([self.h5_spec_inds.shape[0], -1]) + pos_inds = self.h5_pos_inds[pos_slice.ravel(), :] + spec_inds = self.h5_spec_inds[:, spec_slice.ravel()].reshape([self.h5_spec_inds.shape[0], -1]) if verbose: print('Sliced position indices:') print(pos_inds) diff --git a/tests/io/test_usi_dataset.py b/tests/io/test_usi_dataset.py index b0d10103..1afe5948 100644 --- a/tests/io/test_usi_dataset.py +++ b/tests/io/test_usi_dataset.py @@ -339,7 +339,8 @@ def base(self, slice_dict, f2s_slice_list, result_as_nd, lazy_result, usi_main = USIDataset(h5_f['/Raw_Measurement/source_main']) actual, success = usi_main.slice(slice_dict, ndim_form=result_as_nd, - lazy=lazy_result) + lazy=lazy_result, + verbose=verbose) if verbose: print('Status: {}, actual.shape: {}, actual.dtype: {}, ' 'type(actual): {}'.format(success, actual.shape,