Skip to content

Commit

Permalink
Merge pull request #28 from dgergel/feature/implement_daily_bcsd
Browse files Browse the repository at this point in the history
Feature/implement daily bcsd
  • Loading branch information
Joe Hamman authored Nov 5, 2020
2 parents 233b028 + f9ee410 commit e1981ae
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 53 deletions.
10 changes: 10 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,13 @@ Transformers

LinearTrendTransformer
QuantileMapper

Groupers
~~~~~~~~~~~~

.. autosummary::
:toctree: generated/

DAY_GROUPER
MONTH_GROUPER
PaddedDOYGrouper
1 change: 1 addition & 0 deletions skdownscale/pointwise_models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .bcsd import BcsdPrecipitation, BcsdTemperature
from .core import PointWiseDownscaler
from .gard import AnalogRegression, PureAnalog
from .groupers import DAY_GROUPER, MONTH_GROUPER, PaddedDOYGrouper
from .utils import LinearTrendTransformer, QuantileMapper
from .zscore import ZScoreRegressor
1 change: 1 addition & 0 deletions skdownscale/pointwise_models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def _validate_data(self, X, y=None, reset=True, validate_separately=False, **che
X, y = self._check_X_y(X, y, **check_params)
out = X, y

# TO-DO: add check_n_features attribute
if check_params.get('ensure_2d', True):
self._check_n_features(X, reset=reset)

Expand Down
144 changes: 98 additions & 46 deletions skdownscale/pointwise_models/bcsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,58 @@
from sklearn.utils.validation import check_is_fitted

from .base import TimeSynchronousDownscaler
from .utils import QuantileMapper


def MONTH_GROUPER(x):
return x.month
from .groupers import DAY_GROUPER, MONTH_GROUPER, PaddedDOYGrouper
from .utils import QuantileMapper, ensure_samples_features


class BcsdBase(TimeSynchronousDownscaler):
""" Base class for BCSD model.
"""
"""Base class for BCSD model."""

_fit_attributes = ['y_climo_', 'quantile_mappers_']
_timestep = 'M'

def __init__(self, time_grouper=MONTH_GROUPER, return_anoms=True, qm_kwargs={}):
def __init__(
self,
time_grouper=MONTH_GROUPER,
climate_trend_grouper=DAY_GROUPER,
climate_trend=MONTH_GROUPER,
return_anoms=True,
qm_kwargs={},
):

self.time_grouper = time_grouper
self.climate_trend_grouper = climate_trend_grouper
self.climate_trend = climate_trend
self.return_anoms = return_anoms
self.qm_kwargs = qm_kwargs

def _pre_fit(self):
if isinstance(self.time_grouper, str):
self.time_grouper_ = pd.Grouper(freq=self.time_grouper)
if self.time_grouper == 'daily_nasa-nex':
self.time_grouper = PaddedDOYGrouper
self.timestep = 'daily'
else:
self.time_grouper_ = pd.Grouper(freq=self.time_grouper)
self.timestep = 'monthly'
else:
self.time_grouper_ = self.time_grouper
self.timestep = 'monthly'

def _create_groups(self, df, climate_trend=False):
"""helper function to create groups by either daily or month"""
if self.timestep == 'monthly':
return df.groupby(self.time_grouper)
elif self.timestep == 'daily':
if climate_trend:
# group by day only rather than also +/- offset days
return df.groupby(self.climate_trend_grouper)
else:
return self.time_grouper(df)
else:
raise TypeError('unexpected time grouper type %s' % self.time_grouper)

def _qm_fit_by_group(self, groups):
""" helper function to fit quantile mappers by group
"""helper function to fit quantile mappers by group
Note that we store these mappers for later
"""
Expand All @@ -40,7 +65,7 @@ def _qm_fit_by_group(self, groups):
self.quantile_mappers_[key] = QuantileMapper(**self.qm_kwargs).fit(group)

def _qm_transform_by_group(self, groups):
""" helper function to apply quantile mapping by group
"""helper function to apply quantile mapping by group
Note that we recombine the dataframes using pd.concat, there may be a better way to do this
"""
Expand All @@ -51,9 +76,22 @@ def _qm_transform_by_group(self, groups):
dfs.append(pd.DataFrame(qmapped, index=group.index, columns=group.columns))
return pd.concat(dfs).sort_index()

def _remove_climatology(self, obj, climatology, climate_trend=False):
"""helper function to remove climatologies"""
dfs = []
for key, group in self._create_groups(obj, climate_trend):
if self.timestep == 'monthly':
dfs.append(group - climatology.loc[key].values)
elif self.timestep == 'daily':
dfs.append(group - climatology.loc[key])

result = pd.concat(dfs).sort_index()
assert obj.shape == result.shape
return result


class BcsdPrecipitation(BcsdBase):
""" Classic BCSD model for Precipitation
"""Classic BCSD model for Precipitation
Parameters
----------
Expand All @@ -72,7 +110,7 @@ class BcsdPrecipitation(BcsdBase):
"""

def fit(self, X, y):
""" Fit BcsdPrecipitation model
"""Fit BcsdPrecipitation model
Parameters
----------
Expand All @@ -88,16 +126,19 @@ def fit(self, X, y):

self._pre_fit()
X, y = self._validate_data(X, y, y_numeric=True)
# TO-DO: set n_features_n attribute
if self.n_features_in_ != 1:
raise ValueError(f'BCSD only supports 1 feature, found {self.n_features_in_}')

y_groups = y.groupby(self.time_grouper)
y_groups = self._create_groups(y)
# calculate the climatologies
self.y_climo_ = y_groups.mean()

if self.y_climo_.values.min() <= 0:
raise ValueError('Invalid value in target climatology')

# fit the quantile mappers
# TO-DO: do we need to detrend the data before fitting the quantile mappers??
self._qm_fit_by_group(y_groups)

return self
Expand All @@ -119,23 +160,28 @@ def predict(self, X):
X = self._validate_data(X)

# Bias correction
# apply quantile mapping by month
Xqm = self._qm_transform_by_group(X.groupby(self.time_grouper))
# apply quantile mapping by month or day
Xqm = self._qm_transform_by_group(self._create_groups(X, climate_trend=True))

# calculate the anomalies as a ratio of the training data
if self.return_anoms:
return self._calc_ratio_anoms(Xqm, self.y_climo_)
else:
return Xqm

def _calc_ratio_anoms(self, obj, climatology):
def _calc_ratio_anoms(self, obj, climatology, climate_trend=False):
"""helper function for dividing day groups by climatology"""
dfs = []
for key, group in obj.groupby(self.time_grouper):
dfs.append(group / climatology.loc[key].values)
for key, group in self._create_groups(obj, climate_trend):
if self.timestep == 'monthly':
dfs.append(group / climatology.loc[key].values)
else:
dfs.append(group / climatology.loc[key])

result = pd.concat(dfs).sort_index()
assert obj.shape == result.shape

out = pd.concat(dfs).sort_index()
assert obj.shape == out.shape
return out
return result

def _more_tags(self):
return {
Expand All @@ -162,7 +208,7 @@ def _more_tags(self):

class BcsdTemperature(BcsdBase):
def fit(self, X, y):
""" Fit BcsdTemperature model
"""Fit BcsdTemperature model
Parameters
----------
Expand All @@ -175,14 +221,18 @@ def fit(self, X, y):
-------
self : returns an instance of self.
"""

self._pre_fit()
X, y = self._validate_data(X, y, y_numeric=True)
# TO-DO: set n_features_in attribute
if self.n_features_in_ != 1:
raise ValueError(f'BCSD only supports 1 feature, found {self.n_features_in_}')
raise ValueError(f'BCSD only supports up to 4 features, found {self.n_features_in_}')

# make groups for day or month
y_groups = self._create_groups(y)

# calculate the climatologies
self._x_climo = X.groupby(self.time_grouper).mean()
y_groups = y.groupby(self.time_grouper)
self._x_climo = self._create_groups(X).mean()
self.y_climo_ = y_groups.mean()

# fit the quantile mappers
Expand All @@ -191,7 +241,7 @@ def fit(self, X, y):
return self

def predict(self, X):
""" Predict using the BcsdTemperature model
"""Predict using the BcsdTemperature model
Parameters
----------
Expand All @@ -206,42 +256,44 @@ def predict(self, X):
check_is_fitted(self)
X = self._check_array(X)

# X = ensure_samples_features(X) # don't need????

# Calculate the 9-year running mean for each month
def rolling_func(x):
return x.rolling(9, center=True, min_periods=1).mean()

X_rolling_mean = X.groupby(self.time_grouper, group_keys=False).apply(rolling_func)
X_rolling_mean = X.groupby(self.climate_trend, group_keys=False).apply(rolling_func)

# calc shift
# why isn't this working??
# X_shift = X_rolling_mean.groupby(self.time_grouper) - self._x_climo
X_shift = self._remove_climatology(X_rolling_mean, self._x_climo)
# remove climatology from 9-year monthly mean climate trend
X_shift = self._remove_climatology(X_rolling_mean, self._x_climo, climate_trend=True)

# remove shift
# remove shift from model data
X_no_shift = X - X_shift

# Bias correction
# apply quantile mapping by month
Xqm = self._qm_transform_by_group(X_no_shift.groupby(self.time_grouper))
# apply quantile mapping by month or day
Xqm = self._qm_transform_by_group(self._create_groups(X_no_shift, climate_trend=True))

# restore the shift
# restore the climate trend
X_qm_with_shift = X_shift + Xqm
# calculate the anomalies

# return bias corrected absolute values or calculate the anomalies
if self.return_anoms:
return self._remove_climatology(X_qm_with_shift, self.y_climo_)
else:
return X_qm_with_shift

def _remove_climatology(self, obj, climatology):
def _remove_climatology(self, obj, climatology, climate_trend=False):
"""helper function to remove climatologies"""
dfs = []
for key, group in obj.groupby(self.time_grouper):
dfs.append(group - climatology.loc[key].values)

out = pd.concat(dfs).sort_index()
assert obj.shape == out.shape
return out
for key, group in self._create_groups(obj, climate_trend):
if self.timestep == 'monthly':
dfs.append(group - climatology.loc[key].values)
elif self.timestep == 'daily':
dfs.append(group - climatology.loc[key].values)

result = pd.concat(dfs).sort_index()
if obj.shape != result.shape:
raise ValueError('shape of climo is not equal to input array')
return result

def _more_tags(self):
return {
Expand Down
89 changes: 89 additions & 0 deletions skdownscale/pointwise_models/groupers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import warnings

import numpy as np
import pandas as pd


class SkdownscaleGroupGeneratorBase:
pass


def MONTH_GROUPER(x):
return x.month


def DAY_GROUPER(x):
return x.day


class PaddedDOYGrouper(SkdownscaleGroupGeneratorBase):
def __init__(self, df, offset=15):
self.n = 1
self.df = df
self.max = 366
# check for leap days
# if leap days present, flag for day groups count
if len(self.df[((self.df.index.month == 2) & (self.df.index.day == 29))]) > 0:
self.leap = 'leap'
else:
self.leap = 'noleap'
# split up data by leap and non leap years
# necessary because pandas dayofyear
self.df_leap = self.df[self.df.index.is_leap_year]
self.df_noleap = self.df[~self.df.index.is_leap_year]
self.offset = offset
self.days_of_nonleap_year = np.arange(self.n, self.max)
self.days_of_leap_year = np.arange(self.n, self.max + 1)
self.days_of_nonleap_year_wrapped = np.pad(
self.days_of_nonleap_year, self.offset, mode='wrap'
)
self.days_of_leap_year_wrapped = np.pad(self.days_of_leap_year, self.offset, mode='wrap')

def __iter__(self):
self.n = 1
return self

def __next__(self):
# n as day of year
if self.n > self.max:
raise StopIteration

i = self.n - 1
total_days = (2 * self.offset) + 1

# create day groups with +/- offset # of days
first_set_leap = self.days_of_leap_year_wrapped[i : i + self.offset]
first_set_noleap = self.days_of_nonleap_year_wrapped[i : i + self.offset]

sec_set_leap = self.days_of_leap_year_wrapped[self.n + self.offset : i + total_days]
sec_set_noleap = self.days_of_nonleap_year_wrapped[self.n + self.offset : i + total_days]

all_days_leap = np.concatenate((first_set_leap, np.array([self.n]), sec_set_leap), axis=0)
all_days_noleap = np.concatenate(
(first_set_noleap, np.array([self.n]), sec_set_noleap), axis=0
)

# check that day groups contain the correct number of days
if len(set(all_days_leap)) != total_days and self.leap == 'noleap':
warnings.warn('leap days not included, day groups in leap years missing leap days')

if len(set(all_days_noleap)) != total_days and self.n != 366:
raise ValueError('no leap day groups do not contain the correct set of days')

result = pd.concat(
[
self.df_leap[self.df_leap.index.dayofyear.isin(all_days_leap)],
self.df_noleap[self.df_noleap.index.dayofyear.isin(all_days_noleap)],
]
)

self.n += 1

return self.n - 1, result

def mean(self):
arr_means = np.full((self.max, 1), np.inf)
for key, group in self:
arr_means[key - 1] = group.mean().values[0]
result = pd.DataFrame(arr_means, index=self.days_of_leap_year)
return result
Loading

0 comments on commit e1981ae

Please sign in to comment.