From a4adb180373705a3ce26d7ea446c969e28f857dc Mon Sep 17 00:00:00 2001 From: ghiggi Date: Fri, 5 Apr 2024 13:42:49 +0200 Subject: [PATCH] Enable filter available stations by name --- disdrodb/api/io.py | 21 +++++++------ disdrodb/metadata/search.py | 62 +++++++++---------------------------- 2 files changed, 26 insertions(+), 57 deletions(-) diff --git a/disdrodb/api/io.py b/disdrodb/api/io.py index b0faa2e2..67b38242 100644 --- a/disdrodb/api/io.py +++ b/disdrodb/api/io.py @@ -192,7 +192,7 @@ def _check_data_sources(base_dir, product, data_sources): idx_invalid = np.where(np.isin(data_sources, list_dir, invert=True))[0] if len(idx_invalid) > 0: invalid_data_sources = data_sources[idx_invalid].tolist() - raise ValueError(f"These data sources are invalid: {invalid_data_sources}.") + raise ValueError(f"These data sources does not exist: {invalid_data_sources}.") # Return data_sources list data_sources = data_sources.tolist() return data_sources @@ -223,7 +223,7 @@ def _check_campaign_names(base_dir, product, campaign_names): idx_invalid = np.where(np.isin(campaign_names, list_campaign_names, invert=True))[0] if len(idx_invalid) > 0: invalid_campaign_names = campaign_names[idx_invalid].tolist() - raise ValueError(f"These campaign names are invalid: {invalid_campaign_names}.") + raise ValueError(f"These campaign names does not exist: {invalid_campaign_names}.") # Return campaign_names list campaign_names = campaign_names.tolist() return campaign_names @@ -276,6 +276,7 @@ def available_stations( product, data_sources=None, campaign_names=None, + station_names=None, return_tuple=True, base_dir=None, ): @@ -293,6 +294,8 @@ def available_stations( product=product, campaign_names=campaign_names, ) + if isinstance(station_names, str): + station_names = [station_names] # If data_source is None, first retrieve all stations if data_sources is None: @@ -304,18 +307,18 @@ def available_stations( data_sources=data_sources, product=product, ) - # Then, if campaign_name is not None, subset by campaign_name + # If campaign_names is not None, subset by campaign_names if campaign_names is not None: list_info = [info for info in list_info if info[1] in campaign_names] + # If station_names is not None, subset by station_names + if station_names is not None: + list_info = [info for info in list_info if info[2] in station_names] + + # Return list with the tuple (data_source, campaign_name, station_name) if return_tuple: return list_info - # TODO: - # - Filter by station names ! - # - Add check_station_names - # - Simplify available_stations - - # TODO: ENSURE THAT NO DUPLICATED STATION NAMES ? + # - Return list with the name of the available stations list_stations = [info[2] for info in list_info] return list_stations diff --git a/disdrodb/metadata/search.py b/disdrodb/metadata/search.py index 26edd936..a6040d31 100644 --- a/disdrodb/metadata/search.py +++ b/disdrodb/metadata/search.py @@ -178,58 +178,24 @@ def _get_list_metadata_with_data(base_dir, data_sources=None, campaign_names=Non product="RAW", data_sources=data_sources, campaign_names=campaign_names, + station_names=station_names, ) - # If no stations available, raise an error - if len(list_info) == 0: - raise ValueError("No stations are available !") - - if isinstance(station_names, str): - station_names = [station_names] - - ### This solution is slow ! - # if isinstance(station_names, type(None)): - # station_names = [station_name for _, _, station_name in list_info] - # metadata_filepaths = [ - # define_metadata_filepath( - # product="Raw", - # data_source=data_source, - # campaign_name=campaign_name, - # station_name=station_name, - # base_dir=base_dir, - # check_exists=False, - # ) - # for data_source, campaign_name, station_name in list_info if station_name in station_names - # ] - - # Get metadata filepaths - if isinstance(station_names, list): - metadata_filepaths = [ - define_metadata_filepath( - product="Raw", - data_source=data_source, - campaign_name=campaign_name, - station_name=station_name, - base_dir=base_dir, - check_exists=False, - ) - for data_source, campaign_name, station_name in list_info - if station_name in station_names - ] - else: - metadata_filepaths = [ - define_metadata_filepath( - product="Raw", - data_source=data_source, - campaign_name=campaign_name, - station_name=station_name, - base_dir=base_dir, - check_exists=False, - ) - for data_source, campaign_name, station_name in list_info - ] + # Retrieve metadata filepaths + metadata_filepaths = [ + define_metadata_filepath( + product="RAW", + data_source=data_source, + campaign_name=campaign_name, + station_name=station_name, + base_dir=base_dir, + check_exists=False, + ) + for data_source, campaign_name, station_name in list_info + ] # If no stations available, raise an error if len(metadata_filepaths) == 0: raise ValueError("No stations are available !") + return sorted(metadata_filepaths)