diff --git a/disdrodb/api/create_directories.py b/disdrodb/api/create_directories.py index f8b961be..81a0be76 100644 --- a/disdrodb/api/create_directories.py +++ b/disdrodb/api/create_directories.py @@ -216,7 +216,6 @@ def create_l0_directory_structure( station_name, force, product, - verbose=False, ): """Create directory structure for the first L0 DISDRODB product. diff --git a/disdrodb/issue/checks.py b/disdrodb/issue/checks.py index d89065c9..a5bc68bf 100644 --- a/disdrodb/issue/checks.py +++ b/disdrodb/issue/checks.py @@ -82,7 +82,7 @@ def _check_timestep_datetime_accuracy(timesteps, unit="s"): return timesteps -def _check_timestep_string_second_accuracy(timesteps, n=19): +def _check_timestep_string_second_accuracy(timesteps): """Check the timesteps string are provided with second accuracy. Note: it assumes the YYYY-mm-dd HH:MM:SS format diff --git a/disdrodb/l0/check_standards.py b/disdrodb/l0/check_standards.py index 3c484b3b..7ff03503 100644 --- a/disdrodb/l0/check_standards.py +++ b/disdrodb/l0/check_standards.py @@ -56,7 +56,7 @@ def _check_valid_range(df, dict_data_range, verbose=False): if len(list_wrong_columns) > 0: msg = f"Columns {list_wrong_columns} has values outside the expected data range." - log_error(logger=logger, msg=msg, verbose=False) + log_error(logger=logger, msg=msg, verbose=verbose) raise ValueError(msg) @@ -81,7 +81,7 @@ def _check_valid_values(df, dict_valid_values, verbose=False): if len(list_wrong_columns) > 0: msg = "\n".join(list_msg) - log_error(logger=logger, msg=msg, verbose=False) + log_error(logger=logger, msg=msg, verbose=verbose) raise ValueError(f"Columns {list_wrong_columns} have invalid values.") @@ -109,7 +109,7 @@ def _check_raw_fields_available(df: pd.DataFrame, sensor_name: str, verbose: boo # Check that raw_drop_number is present if "raw_drop_number" not in df.columns: msg = "The 'raw_drop_number' column is not present in the dataframe." - log_error(logger=logger, msg=msg, verbose=False) + log_error(logger=logger, msg=msg, verbose=verbose) raise ValueError(msg) # Report additional raw arrays that are missing @@ -218,4 +218,5 @@ def check_l0a_standards(df: pd.DataFrame, sensor_name: str, verbose: bool = True def check_l0b_standards(x: str) -> None: # - Check for realistic values after having removed the flags !!!! + x = "noqa" # noqa F841 pass diff --git a/disdrodb/l0/l0_processing.py b/disdrodb/l0/l0_processing.py index 56b11572..63823195 100644 --- a/disdrodb/l0/l0_processing.py +++ b/disdrodb/l0/l0_processing.py @@ -438,7 +438,6 @@ def run_l0a( product="L0A", station_name=station_name, force=force, - verbose=verbose, ) # -------------------------------------------------------------------------. @@ -726,7 +725,6 @@ def run_l0b_from_nc( product="L0B", station_name=station_name, force=force, - verbose=verbose, ) # -------------------------------------------------------------------------. diff --git a/disdrodb/l0/l0a_processing.py b/disdrodb/l0/l0a_processing.py index 89f1af27..12348c18 100644 --- a/disdrodb/l0/l0a_processing.py +++ b/disdrodb/l0/l0a_processing.py @@ -138,7 +138,7 @@ def _check_df_sanitizer_fun(df_sanitizer_fun): raise ValueError("The `df_sanitizer_fun` must have only `df` as input argument!") -def _check_not_empty_dataframe(df, verbose=False): +def _check_not_empty_dataframe(df): if len(df.index) == 0: msg = " - The file is empty and has been skipped." log_error(logger=logger, msg=msg, verbose=False) @@ -288,7 +288,7 @@ def remove_issue_timesteps(df, issue_dict, verbose=False): return df -def cast_column_dtypes(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame: +def cast_column_dtypes(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame: """Convert ``'object'`` dataframe columns into DISDRODB L0A dtype standards. Parameters @@ -297,8 +297,6 @@ def cast_column_dtypes(df: pd.DataFrame, sensor_name: str, verbose: bool = False Input dataframe. sensor_name : str Name of the sensor. - verbose : bool - Whether to verbose the processing. The default is ``False``. Returns ------- @@ -328,7 +326,7 @@ def cast_column_dtypes(df: pd.DataFrame, sensor_name: str, verbose: bool = False return df -def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame: +def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame: """Coerce corrupted values in dataframe numeric columns to ``np.nan``. Parameters @@ -337,8 +335,6 @@ def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str, verbose: Input dataframe. sensor_name : str Name of the sensor. - verbose : bool - Whether to verbose the processing. The default is ``False``. Returns ------- @@ -361,7 +357,7 @@ def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str, verbose: return df -def strip_string_spaces(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame: +def strip_string_spaces(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame: """Strip leading/trailing spaces from dataframe string columns. Parameters @@ -370,8 +366,6 @@ def strip_string_spaces(df: pd.DataFrame, sensor_name: str, verbose: bool = Fals Input dataframe. sensor_name : str Name of the sensor. - verbose : bool - Whether to verbose the processing. The default is ``False``. Returns ------- @@ -392,7 +386,7 @@ def strip_string_spaces(df: pd.DataFrame, sensor_name: str, verbose: bool = Fals try: df[column] = df[column].str.strip() except AttributeError: - msg = f"AttributeError: The column {column} is not a string/object dtype." + msg = f"The column {column} is not a string/object dtype." log_error(logger=logger, msg=msg, verbose=False) raise AttributeError(msg) return df @@ -618,7 +612,7 @@ def process_raw_file( ) # - Check if file empty - _check_not_empty_dataframe(df=df, verbose=verbose) + _check_not_empty_dataframe(df=df) # - Check dataframe column number matches columns_names _check_matching_column_number(df, column_names, verbose=False) @@ -637,10 +631,10 @@ def process_raw_file( df = remove_issue_timesteps(df, issue_dict=issue_dict, verbose=verbose) # - Coerce numeric columns corrupted values to np.nan - df = coerce_corrupted_values_to_nan(df, sensor_name=sensor_name, verbose=verbose) + df = coerce_corrupted_values_to_nan(df, sensor_name=sensor_name) # - Strip trailing/leading space from string columns - df = strip_string_spaces(df, sensor_name=sensor_name, verbose=verbose) + df = strip_string_spaces(df, sensor_name=sensor_name) # - Strip first and last delimiter from the raw arrays df = strip_delimiter_from_raw_arrays(df) @@ -649,7 +643,7 @@ def process_raw_file( df = remove_corrupted_rows(df) # - Cast dataframe to dtypes - df = cast_column_dtypes(df, sensor_name=sensor_name, verbose=verbose) + df = cast_column_dtypes(df, sensor_name=sensor_name) # - Replace nan flags values with np.nans df = replace_nan_flags(df, sensor_name=sensor_name, verbose=verbose) @@ -729,7 +723,7 @@ def write_l0a( row_group_size=row_group_size, ) msg = f"The Pandas Dataframe has been written as an Apache Parquet file to {filepath}." - log_info(logger=logger, msg=msg, verbose=False) + log_info(logger=logger, msg=msg, verbose=verbose) except Exception as e: msg = f" - The Pandas DataFrame cannot be written as an Apache Parquet file. The error is: \n {e}." log_error(logger=logger, msg=msg, verbose=False) diff --git a/disdrodb/l0/l0b_processing.py b/disdrodb/l0/l0b_processing.py index ff4f1ec4..cc6f0416 100644 --- a/disdrodb/l0/l0b_processing.py +++ b/disdrodb/l0/l0b_processing.py @@ -339,7 +339,7 @@ def _set_attrs_dict(ds, attrs_dict): def _set_coordinate_attributes(ds): # Get attributes dictionary - attrs_dict = get_coords_attrs_dict(ds) + attrs_dict = get_coords_attrs_dict() # Set attributes ds = _set_attrs_dict(ds, attrs_dict) return ds diff --git a/disdrodb/l0/standards.py b/disdrodb/l0/standards.py index dbd75651..e5952191 100644 --- a/disdrodb/l0/standards.py +++ b/disdrodb/l0/standards.py @@ -263,7 +263,7 @@ def get_l0b_cf_attrs_dict(sensor_name: str) -> dict: #### Coordinates attributes -def get_coords_attrs_dict(ds): +def get_coords_attrs_dict(): """Return dictionary with DISDRODB coordinates attributes.""" attrs_dict = {} # Define diameter attributes diff --git a/disdrodb/metadata/search.py b/disdrodb/metadata/search.py index 065772eb..0fd1677a 100644 --- a/disdrodb/metadata/search.py +++ b/disdrodb/metadata/search.py @@ -142,7 +142,7 @@ def _get_list_all_metadata(base_dir, data_sources=None, campaign_names=None, sta return list(set(metadata_filepaths)) -def _get_list_metadata_with_data(base_dir, data_sources=None, campaign_names=None, station_names=None): +def _get_list_metadata_with_data(base_dir, data_sources=None, campaign_names=None): """ Get the list of metadata filepaths that have corresponding data in the DISDRODB raw archive. @@ -159,9 +159,6 @@ def _get_list_metadata_with_data(base_dir, data_sources=None, campaign_names=Non Name of the campaign(s) of interest. The name(s) must be UPPER CASE. The default is ``None``. - station_names : str or list of str - Station names of interest. - The default is ``None``. Returns ------- diff --git a/disdrodb/tests/test_l0/test_l0a_processing.py b/disdrodb/tests/test_l0/test_l0a_processing.py index ac490d80..e6c48354 100644 --- a/disdrodb/tests/test_l0/test_l0a_processing.py +++ b/disdrodb/tests/test_l0/test_l0a_processing.py @@ -205,13 +205,13 @@ def test_strip_string_spaces(create_test_config_files): def test_coerce_corrupted_values_to_nan(create_test_config_files): # Test with a valid dataframe df = pd.DataFrame({"key_4": ["1"]}) - df_out = coerce_corrupted_values_to_nan(df, sensor_name=TEST_SENSOR_NAME, verbose=False) + df_out = coerce_corrupted_values_to_nan(df, sensor_name=TEST_SENSOR_NAME) assert df.equals(df_out) # Test with a wrong dataframe df = pd.DataFrame({"key_4": ["text"]}) - df_out = coerce_corrupted_values_to_nan(df, sensor_name=TEST_SENSOR_NAME, verbose=False) + df_out = coerce_corrupted_values_to_nan(df, sensor_name=TEST_SENSOR_NAME) assert pd.isnull(df_out["key_4"][0]) @@ -343,7 +343,7 @@ def test_cast_column_dtypes(): }) # Call the function sensor_name = "OTT_Parsivel" - df_out = cast_column_dtypes(df, sensor_name, verbose=False) + df_out = cast_column_dtypes(df, sensor_name) # Check that the output dataframe has the correct column types assert str(df_out["time"].dtype) == "datetime64[s]" assert str(df_out["station_number"].dtype) == "object" @@ -352,7 +352,7 @@ def test_cast_column_dtypes(): # Assert raise error if can not cast df["altitude"] = "text" with pytest.raises(ValueError): - cast_column_dtypes(df, sensor_name, verbose=False) + cast_column_dtypes(df, sensor_name) def test_remove_rows_with_missing_time(): diff --git a/disdrodb/utils/netcdf.py b/disdrodb/utils/netcdf.py index 1c46215c..0678e08f 100644 --- a/disdrodb/utils/netcdf.py +++ b/disdrodb/utils/netcdf.py @@ -401,7 +401,7 @@ def _concatenate_datasets(list_ds, dim="time", verbose=False): msg = "Start concatenating with xr.concat." log_info(logger=logger, msg=msg, verbose=verbose) - ds = xr.concat(list_ds, dim="time", coords="minimal", compat="override") + ds = xr.concat(list_ds, dim=dim, coords="minimal", compat="override") msg = "Concatenation with xr.concat has been successful." log_info(logger=logger, msg=msg, verbose=verbose) diff --git a/pyproject.toml b/pyproject.toml index 88c68b51..2933cb26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -155,7 +155,7 @@ select = [ # flake8-return "RET", # flake8-unused-arguments - # "ARG", + "ARG", # flake8-raise "RSE", # flake8-pytest-style diff --git a/tutorials/reader_preparation.ipynb b/tutorials/reader_preparation.ipynb index ccf9d2db..c6a940ef 100644 --- a/tutorials/reader_preparation.ipynb +++ b/tutorials/reader_preparation.ipynb @@ -276,7 +276,6 @@ " processed_dir=processed_dir,\n", " station_name=station_name,\n", " force=force,\n", - " verbose=False,\n", " product=\"L0A\",\n", ")" ]