Skip to content

Commit

Permalink
Enable filter available stations by name
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Apr 5, 2024
1 parent ac24646 commit c712cb2
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 57 deletions.
21 changes: 12 additions & 9 deletions disdrodb/api/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def _check_data_sources(base_dir, product, data_sources):
idx_invalid = np.where(np.isin(data_sources, list_dir, invert=True))[0]
if len(idx_invalid) > 0:
invalid_data_sources = data_sources[idx_invalid].tolist()
raise ValueError(f"These data sources are invalid: {invalid_data_sources}.")
raise ValueError(f"These data sources does not exist: {invalid_data_sources}.")
# Return data_sources list
data_sources = data_sources.tolist()
return data_sources
Expand Down Expand Up @@ -223,7 +223,7 @@ def _check_campaign_names(base_dir, product, campaign_names):
idx_invalid = np.where(np.isin(campaign_names, list_campaign_names, invert=True))[0]
if len(idx_invalid) > 0:
invalid_campaign_names = campaign_names[idx_invalid].tolist()
raise ValueError(f"These campaign names are invalid: {invalid_campaign_names}.")
raise ValueError(f"These campaign names does not exist: {invalid_campaign_names}.")
# Return campaign_names list
campaign_names = campaign_names.tolist()
return campaign_names
Expand Down Expand Up @@ -276,6 +276,7 @@ def available_stations(
product,
data_sources=None,
campaign_names=None,
station_names=None,
return_tuple=True,
base_dir=None,
):
Expand All @@ -293,6 +294,8 @@ def available_stations(
product=product,
campaign_names=campaign_names,
)
if isinstance(station_names, str):
station_names = [station_names]

# If data_source is None, first retrieve all stations
if data_sources is None:
Expand All @@ -304,18 +307,18 @@ def available_stations(
data_sources=data_sources,
product=product,
)
# Then, if campaign_name is not None, subset by campaign_name
# If campaign_names is not None, subset by campaign_names
if campaign_names is not None:
list_info = [info for info in list_info if info[1] in campaign_names]

# If station_names is not None, subset by station_names
if station_names is not None:
list_info = [info for info in list_info if info[2] in station_names]

# Return list with the tuple (data_source, campaign_name, station_name)
if return_tuple:
return list_info

# TODO:
# - Filter by station names !
# - Add check_station_names
# - Simplify available_stations

# TODO: ENSURE THAT NO DUPLICATED STATION NAMES ?
# - Return list with the name of the available stations
list_stations = [info[2] for info in list_info]
return list_stations
62 changes: 14 additions & 48 deletions disdrodb/metadata/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,58 +178,24 @@ def _get_list_metadata_with_data(base_dir, data_sources=None, campaign_names=Non
product="RAW",
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
)

# If no stations available, raise an error
if len(list_info) == 0:
raise ValueError("No stations are available !")

if isinstance(station_names, str):
station_names = [station_names]

### This solution is slow !
# if isinstance(station_names, type(None)):
# station_names = [station_name for _, _, station_name in list_info]
# metadata_filepaths = [
# define_metadata_filepath(
# product="Raw",
# data_source=data_source,
# campaign_name=campaign_name,
# station_name=station_name,
# base_dir=base_dir,
# check_exists=False,
# )
# for data_source, campaign_name, station_name in list_info if station_name in station_names
# ]

# Get metadata filepaths
if isinstance(station_names, list):
metadata_filepaths = [
define_metadata_filepath(
product="Raw",
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
base_dir=base_dir,
check_exists=False,
)
for data_source, campaign_name, station_name in list_info
if station_name in station_names
]
else:
metadata_filepaths = [
define_metadata_filepath(
product="Raw",
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
base_dir=base_dir,
check_exists=False,
)
for data_source, campaign_name, station_name in list_info
]
# Retrieve metadata filepaths
metadata_filepaths = [
define_metadata_filepath(
product="RAW",
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
base_dir=base_dir,
check_exists=False,
)
for data_source, campaign_name, station_name in list_info
]

# If no stations available, raise an error
if len(metadata_filepaths) == 0:
raise ValueError("No stations are available !")

return sorted(metadata_filepaths)

0 comments on commit c712cb2

Please sign in to comment.