@@ -140,6 +140,9 @@ def __init__(self, stream_resource: StreamResource, descriptor: EventDescriptor)
140140 self ._num_rows : int = 0 # Number of rows in the Data Source (all rows, includung skips)
141141 self ._seqnums_to_indices_map : dict [int , int ] = {}
142142
143+ # Set the dimension names if provided
144+ self .dims : tuple [str , ...] = tuple (data_desc .get ("dims" , ()))
145+
143146 @classmethod
144147 def get_supported_mimetype (cls , sres ):
145148 if sres ["mimetype" ] not in cls .supported_mimetypes :
@@ -241,6 +244,7 @@ def structure(self) -> ArrayStructure:
241244 data_type = self .data_type ,
242245 shape = self .shape ,
243246 chunks = self .chunks ,
247+ dims = self .dims if self .dims else None ,
244248 )
245249
246250 def consume_stream_datum (self , doc : StreamDatum ):
@@ -300,22 +304,24 @@ def update_from_stream_resource(self, stream_resource: StreamResource):
300304
301305 raise NotImplementedError ("This method is not implemented in the base Consolidator class." )
302306
303- def validate (self , adapters_by_mimetype = None , fix_errors = False ):
307+ def validate (self , adapters_by_mimetype = None , fix_errors = False ) -> list [ str ] :
304308 """Validate the Consolidator's state against the expected structure"""
305309
306310 # User-provided adapters take precedence over defaults.
307311 all_adapters_by_mimetype = collections .ChainMap ((adapters_by_mimetype or {}), DEFAULT_ADAPTERS_BY_MIMETYPE )
308312 adapter_class = all_adapters_by_mimetype [self .mimetype ]
309313
310314 # Initialize adapter from uris and determine the structure
311- uris = [asset .data_uri for asset in self .assets if asset . parameter == "data_uris" ]
315+ uris = [asset .data_uri for asset in self .assets ]
312316 structure = adapter_class .from_uris (* uris , ** self .adapter_parameters ()).structure ()
317+ notes = []
313318
314319 if self .shape != structure .shape :
315320 if not fix_errors :
316321 raise ValueError (f"Shape mismatch: { self .shape } != { structure .shape } " )
317322 else :
318- warnings .warn (f"Fixing shape mismatch: { self .shape } -> { structure .shape } " , stacklevel = 2 )
323+ msg = f"Fixed shape mismatch: { self .shape } -> { structure .shape } "
324+ warnings .warn (msg , stacklevel = 2 )
319325 if self .join_method == "stack" :
320326 self ._num_rows = structure .shape [0 ]
321327 self .datum_shape = structure .shape [1 :]
@@ -324,26 +330,53 @@ def validate(self, adapters_by_mimetype=None, fix_errors=False):
324330 multiplier = 1 if structure .shape [0 ] % structure .chunks [0 ][0 ] else structure .chunks [0 ][0 ]
325331 self ._num_rows = structure .shape [0 ] // multiplier
326332 self .datum_shape = (multiplier ,) + structure .shape [1 :]
333+ notes .append (msg )
327334
328335 if self .chunks != structure .chunks :
329336 if not fix_errors :
330337 raise ValueError (f"Chunk shape mismatch: { self .chunks } != { structure .chunks } " )
331338 else :
332339 _chunk_shape = tuple (c [0 ] for c in structure .chunks )
333- warnings .warn (f"Fixing chunk shape mismatch: { self .chunk_shape } -> { _chunk_shape } " , stacklevel = 2 )
340+ msg = f"Fixed chunk shape mismatch: { self .chunk_shape } -> { _chunk_shape } "
341+ warnings .warn (msg , stacklevel = 2 )
334342 self .chunk_shape = _chunk_shape
343+ notes .append (msg )
335344
336345 if self .data_type != structure .data_type :
337346 if not fix_errors :
338347 raise ValueError (f"dtype mismatch: { self .data_type } != { structure .data_type } " )
339348 else :
340- warnings . warn (
341- f"Fixing dtype mismatch: { self .data_type .to_numpy_dtype ()} -> { structure . data_type . to_numpy_dtype () } " , # noqa
342- stacklevel = 2 ,
349+ msg = (
350+ f"Fixed dtype mismatch: { self .data_type .to_numpy_dtype ()} "
351+ f"-> { structure . data_type . to_numpy_dtype () } "
343352 )
353+ warnings .warn (msg , stacklevel = 2 )
344354 self .data_type = structure .data_type
355+ notes .append (msg )
356+
357+ if self .dims and (len (self .dims ) != len (structure .shape )):
358+ if not fix_errors :
359+ raise ValueError (
360+ f"Number of dimension names mismatch for a "
361+ f"{ len (structure .shape )} -dimensional array: { self .dims } "
362+ )
363+ else :
364+ old_dims = self .dims
365+ if len (old_dims ) < len (structure .shape ):
366+ self .dims = (
367+ ("time" ,)
368+ + old_dims
369+ + tuple (f"dim{ i } " for i in range (len (old_dims ) + 1 , len (structure .shape )))
370+ )
371+ else :
372+ self .dims = old_dims [: len (structure .shape )]
373+ msg = f"Fixed dimension names: { old_dims } -> { self .dims } "
374+ warnings .warn (msg , stacklevel = 2 )
375+ notes .append (msg )
376+
377+ assert self .get_adapter () is not None , "Adapter can not be initialized"
345378
346- assert self . get_adapter () is not None , "Adapter can not not initialized"
379+ return notes
347380
348381
349382class CSVConsolidator (ConsolidatorBase ):
0 commit comments