Skip to content

Commit d02fe5a

Browse files
authored
Merge pull request #250 from boutproject/runid-sort
Use run_id to sort files
2 parents 86dbb32 + 5c82b92 commit d02fe5a

File tree

1 file changed

+72
-14
lines changed

1 file changed

+72
-14
lines changed

xbout/load.py

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -900,21 +900,79 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
900900

901901
nprocs = nxpe * nype
902902
n_runs = int(len(filepaths) / nprocs)
903-
if len(filepaths) % nprocs != 0:
904-
raise ValueError(
905-
"Each run directory does not contain an equal number "
906-
"of output files. If the parallelization scheme of "
907-
"your simulation changed partway-through, then please "
908-
"load each directory separately and concatenate them "
909-
"along the time dimension with xarray.concat()."
910-
)
903+
runids = []
904+
905+
def getrunid(fp):
906+
if _is_path(fp):
907+
try:
908+
with xr.open_dataset(fp) as tmp:
909+
return tmp.get("run_id", None)
910+
except FileNotFoundError:
911+
return None
912+
return fp.get("run_id", None)
913+
914+
for fp in filepaths:
915+
thisrunid = getrunid(fp)
916+
if thisrunid is None:
917+
runids = None
918+
break
919+
runids.append(thisrunid)
920+
if not runids:
921+
if len(filepaths) < nprocs:
922+
if len(filepaths) == 1:
923+
raise ValueError(
924+
"A parallel simulation was loaded, but only a single "
925+
"file was loaded. Please ensure to pass in all files "
926+
"by specifing e.g. `BOUT.dmp.*.nc` rather than "
927+
"`BOUT.dmp.0.nc`."
928+
)
929+
raise ValueError(
930+
f"A parallel simulation was loaded, but only {len(filepathts)} "
931+
"files were loaded. Please ensure to pass in all files "
932+
"by specifing e.g. `BOUT.dmp.*.nc`"
933+
)
934+
if len(filepaths) % nprocs != 0:
935+
raise ValueError(
936+
"Each run directory does not contain an equal number "
937+
"of output files. If the parallelization scheme of "
938+
"your simulation changed partway-through, then please "
939+
"load each directory separately and concatenate them "
940+
"along the time dimension with xarray.concat()."
941+
)
942+
# Create list of lists of filepaths, so that xarray knows how they should
943+
# be concatenated by xarray.open_mfdataset()
944+
paths = iter(filepaths)
945+
paths_grid = [
946+
[[next(paths) for x in range(nxpe)] for y in range(nype)]
947+
for t in range(n_runs)
948+
]
949+
950+
else:
951+
paths_sorted = []
952+
lastid = None
953+
for path, gid in zip(filepaths, runids):
954+
if lastid != gid:
955+
lastid = gid
956+
paths_sorted.append([])
957+
paths_sorted[-1].append(path)
958+
paths_grid = []
959+
for paths in paths_sorted:
960+
if len(paths) != nprocs:
961+
with xr.open_dataset(paths[0]) as tmp:
962+
if tmp["PE_XIND"] != 0 or tmp["PE_YIND"] != 0:
963+
# The first file is missing.
964+
warn(
965+
f"Ignoring {len(paths)} files as the first seems to be missing: {paths}"
966+
)
967+
continue
968+
assert tmp["NXPE"] == nxpe
969+
assert tmp["NYPE"] == nype
970+
raise ValueError(
971+
f"Something is wrong. We expected {nprocs} files but found {len(paths)} files."
972+
)
973+
paths = iter(paths)
911974

912-
# Create list of lists of filepaths, so that xarray knows how they should
913-
# be concatenated by xarray.open_mfdataset()
914-
paths = iter(filepaths)
915-
paths_grid = [
916-
[[next(paths) for x in range(nxpe)] for y in range(nype)] for t in range(n_runs)
917-
]
975+
paths_grid.append([[next(paths) for x in range(nxpe)] for y in range(nype)])
918976

919977
# Dimensions along which no concatenation is needed are still present as
920978
# single-element lists, so need to concatenation along dim=None for those

0 commit comments

Comments
 (0)