@@ -900,21 +900,79 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
900900
901901 nprocs = nxpe * nype
902902 n_runs = int (len (filepaths ) / nprocs )
903- if len (filepaths ) % nprocs != 0 :
904- raise ValueError (
905- "Each run directory does not contain an equal number "
906- "of output files. If the parallelization scheme of "
907- "your simulation changed partway-through, then please "
908- "load each directory separately and concatenate them "
909- "along the time dimension with xarray.concat()."
910- )
903+ runids = []
904+
905+ def getrunid (fp ):
906+ if _is_path (fp ):
907+ try :
908+ with xr .open_dataset (fp ) as tmp :
909+ return tmp .get ("run_id" , None )
910+ except FileNotFoundError :
911+ return None
912+ return fp .get ("run_id" , None )
913+
914+ for fp in filepaths :
915+ thisrunid = getrunid (fp )
916+ if thisrunid is None :
917+ runids = None
918+ break
919+ runids .append (thisrunid )
920+ if not runids :
921+ if len (filepaths ) < nprocs :
922+ if len (filepaths ) == 1 :
923+ raise ValueError (
924+ "A parallel simulation was loaded, but only a single "
925+ "file was loaded. Please ensure to pass in all files "
926+ "by specifing e.g. `BOUT.dmp.*.nc` rather than "
927+ "`BOUT.dmp.0.nc`."
928+ )
929+ raise ValueError (
930+ f"A parallel simulation was loaded, but only { len (filepathts )} "
931+ "files were loaded. Please ensure to pass in all files "
932+ "by specifing e.g. `BOUT.dmp.*.nc`"
933+ )
934+ if len (filepaths ) % nprocs != 0 :
935+ raise ValueError (
936+ "Each run directory does not contain an equal number "
937+ "of output files. If the parallelization scheme of "
938+ "your simulation changed partway-through, then please "
939+ "load each directory separately and concatenate them "
940+ "along the time dimension with xarray.concat()."
941+ )
942+ # Create list of lists of filepaths, so that xarray knows how they should
943+ # be concatenated by xarray.open_mfdataset()
944+ paths = iter (filepaths )
945+ paths_grid = [
946+ [[next (paths ) for x in range (nxpe )] for y in range (nype )]
947+ for t in range (n_runs )
948+ ]
949+
950+ else :
951+ paths_sorted = []
952+ lastid = None
953+ for path , gid in zip (filepaths , runids ):
954+ if lastid != gid :
955+ lastid = gid
956+ paths_sorted .append ([])
957+ paths_sorted [- 1 ].append (path )
958+ paths_grid = []
959+ for paths in paths_sorted :
960+ if len (paths ) != nprocs :
961+ with xr .open_dataset (paths [0 ]) as tmp :
962+ if tmp ["PE_XIND" ] != 0 or tmp ["PE_YIND" ] != 0 :
963+ # The first file is missing.
964+ warn (
965+ f"Ignoring { len (paths )} files as the first seems to be missing: { paths } "
966+ )
967+ continue
968+ assert tmp ["NXPE" ] == nxpe
969+ assert tmp ["NYPE" ] == nype
970+ raise ValueError (
971+ f"Something is wrong. We expected { nprocs } files but found { len (paths )} files."
972+ )
973+ paths = iter (paths )
911974
912- # Create list of lists of filepaths, so that xarray knows how they should
913- # be concatenated by xarray.open_mfdataset()
914- paths = iter (filepaths )
915- paths_grid = [
916- [[next (paths ) for x in range (nxpe )] for y in range (nype )] for t in range (n_runs )
917- ]
975+ paths_grid .append ([[next (paths ) for x in range (nxpe )] for y in range (nype )])
918976
919977 # Dimensions along which no concatenation is needed are still present as
920978 # single-element lists, so need to concatenation along dim=None for those
0 commit comments