Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First draft of new climatology config #1356

Draft
wants to merge 7 commits into
base: main-dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions pyaerocom/climatology_config.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this module live in pyaerocom/aeroval? I don't really see this being used in the core API

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As of now, it is passed though the colocator, meaning that it is part of the core. If this is moved from the core to aeroval, then some more work is needed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If that's the case then, no, it should stay here in the core. We don't want to have pyaerocom depend on pyaeroval after all.

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pydantic import BaseModel, ValidationError, field_validator

from typing import Literal

from pyaerocom import const


class ClimatologyConfig(BaseModel):
"""
Holds the configuration for the climatology

Attributes
-------------
start : int, optional
Start year of the climatology
stop : int, optional
Stop year of the climatology
resample_how : str, optional
How to resample the climatology. Must be mean or median.
freq : str, optional
Which frequency the climatology should have
mincount : dict, optional
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest min_count. Also maybe worth building a field_validator for this? Or possibly a child class of TypedDict

Number of values should be present for the data to be used in the climatology.
Dict where freqs are the keys and the count is the values

"""

start: int = const.CLIM_START
stop: int = const.CLIM_STOP

set_year: int | None = None

@field_validator("set_year")
@classmethod
def validate_set_year(cls, v):
if v is None:
return int((cls.stop - cls.start) // 2 + cls.start) + 1

Check warning on line 37 in pyaerocom/climatology_config.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/climatology_config.py#L36-L37

Added lines #L36 - L37 were not covered by tests

if v > cls.stop or v < cls.start:
raise ValidationError

Check warning on line 40 in pyaerocom/climatology_config.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/climatology_config.py#L39-L40

Added lines #L39 - L40 were not covered by tests

return v

Check warning on line 42 in pyaerocom/climatology_config.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/climatology_config.py#L42

Added line #L42 was not covered by tests

resample_how: Literal["mean", "median"] = const.CLIM_RESAMPLE_HOW
freq: str = const.CLIM_FREQ
mincount: dict = const.CLIM_MIN_COUNT
11 changes: 6 additions & 5 deletions pyaerocom/colocation/colocation_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from pyaerocom import __version__ as pya_ver
from pyaerocom import const
from pyaerocom.climatology_config import ClimatologyConfig
from pyaerocom._lowlevel_helpers import LayerLimits, RegridResDeg
from pyaerocom.exceptions import (
DataUnitError,
Expand Down Expand Up @@ -311,7 +312,7 @@ def colocate_vertical_profile_gridded(
update_baseyear_gridded: int = None,
min_num_obs: int | dict | None = None,
colocate_time: bool = False,
use_climatology_ref: bool = False,
use_climatology_ref: dict = False,
resample_how: str | dict = None,
colocation_layer_limits: tuple[LayerLimits, ...] | None = None,
profile_layer_limits: tuple[LayerLimits, ...] | None = None,
Expand Down Expand Up @@ -412,10 +413,10 @@ def colocate_vertical_profile_gridded(
data = data.resample_time(str(ts_type), min_num_obs=min_num_obs, how=resample_how)
ts_type_data = ts_type

if use_climatology_ref: # pragma: no cover
col_freq = "monthly"
obs_start = const.CLIM_START
obs_stop = const.CLIM_STOP
if isinstance(use_climatology_ref, ClimatologyConfig): # pragma: no cover
col_freq = use_climatology_ref.freq
obs_start = use_climatology_ref.start
obs_stop = use_climatology_ref.stop
else:
col_freq = str(ts_type)
obs_start = start
Expand Down
23 changes: 19 additions & 4 deletions pyaerocom/colocation/colocation_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)

from pyaerocom import const
from pyaerocom.climatology_config import ClimatologyConfig
from pyaerocom._lowlevel_helpers import LayerLimits, RegridResDeg
from pyaerocom.config import ALL_REGION_NAME
from pyaerocom.helpers import start_stop
Expand Down Expand Up @@ -109,9 +110,9 @@
obs_data_dir : str, optional
location of obs data. If None, attempt to infer obs location based on
obs ID.
obs_use_climatology : bool
BETA if True, pyaerocom default climatology is computed from observation
stations (so far only possible for unrgidded / gridded colocation).
obs_use_climatology : ClimatologyConfig | bool, optional
Configuration for climatology. If True is given, a default configuration is made.
With False, climatology is turned off
obs_vert_type : str
AeroCom vertical code encoded in the model filenames (only AeroCom 3
and later). Specifies which model file should be read in case there are
Expand Down Expand Up @@ -379,7 +380,21 @@
obs_name: str | None = None
obs_data_dir: Path | str | None = None

obs_use_climatology: bool = False
obs_use_climatology: ClimatologyConfig | bool = False

@field_validator("obs_use_climatology")
@classmethod
def validate_obs_use_climatology(cls, v):
if isinstance(v, ClimatologyConfig):
return v

Check warning on line 389 in pyaerocom/colocation/colocation_setup.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/colocation/colocation_setup.py#L389

Added line #L389 was not covered by tests

if isinstance(v, bool):
if v:
return ClimatologyConfig()
else:
return v

raise ValidationError

Check warning on line 397 in pyaerocom/colocation/colocation_setup.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/colocation/colocation_setup.py#L397

Added line #L397 was not covered by tests

obs_cache_only: bool = False # only relevant if obs is ungridded
obs_vert_type: str | None = None
Expand Down
41 changes: 26 additions & 15 deletions pyaerocom/colocation/colocation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from pyaerocom import __version__ as pya_ver
from pyaerocom import const
from pyaerocom.climatology_config import ClimatologyConfig
from pyaerocom._lowlevel_helpers import RegridResDeg
from pyaerocom.exceptions import (
DataUnitError,
Expand Down Expand Up @@ -427,8 +428,9 @@ def _colocate_site_data_helper(
to aggregate from hourly to daily, rather than the mean.
min_num_obs : int or dict, optional
minimum number of observations for resampling of time
use_climatology_ref : bool
if True, climatological timeseries are used from observations
use_climatology_ref : ClimateConfig | bool, optional
If provided, the climatology will be calculated from the config


Raises
------
Expand All @@ -448,8 +450,17 @@ def _colocate_site_data_helper(
var, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True
)[var]

if use_climatology_ref:
obs_ts = stat_data_ref.calc_climatology(var_ref, min_num_obs=min_num_obs)[var_ref]
if isinstance(use_climatology_ref, ClimatologyConfig):
obs_ts = stat_data_ref.calc_climatology(
var_ref,
start=use_climatology_ref.start,
stop=use_climatology_ref.stop,
min_num_obs=min_num_obs,
clim_mincount=use_climatology_ref.mincount,
resample_how=use_climatology_ref.resample_how,
clim_freq=use_climatology_ref.freq,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here set_year should be set

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

set_year=use_climatology_ref.set_year,
)[var_ref]
else:
obs_ts = stat_data_ref.resample_time(
var_ref,
Expand Down Expand Up @@ -501,26 +512,26 @@ def _colocate_site_data_helper_timecol(
to aggregate from hourly to daily, rather than the mean.
min_num_obs : int or dict, optional
minimum number of observations for resampling of time
use_climatology_ref : bool
if True, NotImplementedError is raised
use_climatology_ref: ClimateConfig | bool
if provided, NotImplementedError is raised

Raises
------
TemporalResolutionError
if model or obs sampling frequency is lower than desired output frequency
NotImplementedError
if input arg `use_climatology_ref` is True.
if input arg `use_climatology_ref` is provided.

Returns
-------
pandas.DataFrame
dataframe containing the colocated input data (column names are
data and ref)
"""
if use_climatology_ref:
if isinstance(use_climatology_ref, ClimatologyConfig):
raise NotImplementedError(
"Using observation climatology in colocation with option "
"colocate_time=True is not available yet ..."
"use_climatology_ref is not available yet ..."
)

grid_tst = stat_data.get_var_ts_type(var)
Expand Down Expand Up @@ -672,8 +683,8 @@ def colocate_gridded_ungridded(
if True and if original time resolution of data is higher than desired
time resolution (`ts_type`), then both datasets are colocated in time
*before* resampling to lower resolution.
use_climatology_ref : bool
if True, climatological timeseries are used from observations
use_climatology_ref : ClimateConfig | bool, optional.
Configuration for calculating the climatology. If set to a bool, this will not be done
resample_how : str or dict
string specifying how data should be aggregated when resampling in time.
Default is "mean". Can also be a nested dictionary, e.g.
Expand Down Expand Up @@ -757,10 +768,10 @@ def colocate_gridded_ungridded(
data = data.resample_time(str(ts_type), min_num_obs=min_num_obs, how=resample_how)
ts_type_data = ts_type

if use_climatology_ref:
col_freq = "monthly"
obs_start = const.CLIM_START
obs_stop = const.CLIM_STOP
if isinstance(use_climatology_ref, ClimatologyConfig): # pragma: no cover
col_freq = "monthly" # use_climatology_ref.freq
obs_start = use_climatology_ref.start
obs_stop = use_climatology_ref.stop
Comment on lines +771 to +774
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like an old hack or something(?). Default const.CLIM_FREQ is daily, and is thus used everywhere climatology is calculated. But the col freq is set as monthly if climatology is used. Why?

And changing col_freq in the colocate_gridded_ungridded, is that smart? Then the value from colocation config is no longer immutable(?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like we talked about, we should validate this asap, in the colocation_setup, so that no data is read before this change.

else:
col_freq = str(ts_type)
obs_start = start
Expand Down
45 changes: 38 additions & 7 deletions pyaerocom/stationdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
import xarray as xr

from pyaerocom import const
from pyaerocom._lowlevel_helpers import BrowseDict, dict_to_str, list_to_shortstr, merge_dicts
from pyaerocom._lowlevel_helpers import (
BrowseDict,
dict_to_str,
list_to_shortstr,
merge_dicts,
)
from pyaerocom.exceptions import (
CoordinateError,
DataDimensionError,
Expand Down Expand Up @@ -379,7 +384,11 @@
return output

def get_meta(
self, force_single_value=True, quality_check=True, add_none_vals=False, add_meta_keys=None
self,
force_single_value=True,
quality_check=True,
add_none_vals=False,
add_meta_keys=None,
):
"""Return meta-data as dictionary

Expand Down Expand Up @@ -732,10 +741,18 @@
ts_type = self._check_ts_types_for_merge(other, var_name)

s0 = self.resample_time(
var_name, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True
var_name,
ts_type=ts_type,
how=resample_how,
min_num_obs=min_num_obs,
inplace=True,
)[var_name].dropna()
s1 = other.resample_time(
var_name, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True
var_name,
ts_type=ts_type,
how=resample_how,
min_num_obs=min_num_obs,
inplace=True,
)[var_name].dropna()

info = other.var_info[var_name]
Expand Down Expand Up @@ -1031,14 +1048,21 @@
if ts_type < TsType(
clim_freq
): # current resolution is lower than input climatological freq
supported = list(const.CLIM_MIN_COUNT)
if clim_mincount is None:
supported = list(const.CLIM_MIN_COUNT)

Check warning on line 1052 in pyaerocom/stationdata.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/stationdata.py#L1051-L1052

Added lines #L1051 - L1052 were not covered by tests
else:
supported = list(clim_mincount)

Check warning on line 1054 in pyaerocom/stationdata.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/stationdata.py#L1054

Added line #L1054 was not covered by tests
if str(ts_type) in supported:
clim_freq = str(ts_type)
else: # use monthly
clim_freq = "monthly"

data = self.resample_time(
var_name, ts_type=clim_freq, how=resample_how, min_num_obs=min_num_obs, inplace=False
var_name,
ts_type=clim_freq,
how=resample_how,
min_num_obs=min_num_obs,
inplace=False,
)
ts = data.to_timeseries(var_name)

Expand All @@ -1049,9 +1073,16 @@

if clim_mincount is None:
clim_mincount = const.CLIM_MIN_COUNT[clim_freq]
if isinstance(clim_mincount, dict):
clim_mincount = clim_mincount[clim_freq]

clim = calc_climatology(
ts, start, stop, min_count=clim_mincount, set_year=set_year, resample_how=resample_how
ts,
start,
stop,
min_count=clim_mincount,
set_year=set_year,
resample_how=resample_how,
)

new = StationData()
Expand Down
15 changes: 12 additions & 3 deletions tests/colocation/test_colocation_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,22 @@
@pytest.fixture
def fake_model_data_with_altitude():
longitude = iris.coords.DimCoord(
np.linspace(-15, 25, 20), var_name="lon", standard_name="longitude", units="degrees"
np.linspace(-15, 25, 20),
var_name="lon",
standard_name="longitude",
units="degrees",
)
latitude = iris.coords.DimCoord(
np.linspace(50, 55, 10), var_name="lat", standard_name="latitude", units="degrees"
np.linspace(50, 55, 10),
var_name="lat",
standard_name="latitude",
units="degrees",
)
altitude = iris.coords.DimCoord(
np.linspace(0, 60000, 10000), var_name="alt", standard_name="altitude", units="meters"
np.linspace(0, 60000, 10000),
var_name="alt",
standard_name="altitude",
units="meters",
)
time = iris.coords.DimCoord(
np.arange(18892, 18892 + 7, 1),
Expand Down
Loading