Skip to content

Commit e1981ae

Browse files
author
Joe Hamman
authored
Merge pull request #28 from dgergel/feature/implement_daily_bcsd
Feature/implement daily bcsd
2 parents 233b028 + f9ee410 commit e1981ae

File tree

7 files changed

+246
-53
lines changed

7 files changed

+246
-53
lines changed

docs/api.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,13 @@ Transformers
3838

3939
LinearTrendTransformer
4040
QuantileMapper
41+
42+
Groupers
43+
~~~~~~~~~~~~
44+
45+
.. autosummary::
46+
:toctree: generated/
47+
48+
DAY_GROUPER
49+
MONTH_GROUPER
50+
PaddedDOYGrouper
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .bcsd import BcsdPrecipitation, BcsdTemperature
22
from .core import PointWiseDownscaler
33
from .gard import AnalogRegression, PureAnalog
4+
from .groupers import DAY_GROUPER, MONTH_GROUPER, PaddedDOYGrouper
45
from .utils import LinearTrendTransformer, QuantileMapper
56
from .zscore import ZScoreRegressor

skdownscale/pointwise_models/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def _validate_data(self, X, y=None, reset=True, validate_separately=False, **che
8383
X, y = self._check_X_y(X, y, **check_params)
8484
out = X, y
8585

86+
# TO-DO: add check_n_features attribute
8687
if check_params.get('ensure_2d', True):
8788
self._check_n_features(X, reset=reset)
8889

skdownscale/pointwise_models/bcsd.py

Lines changed: 98 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,58 @@
55
from sklearn.utils.validation import check_is_fitted
66

77
from .base import TimeSynchronousDownscaler
8-
from .utils import QuantileMapper
9-
10-
11-
def MONTH_GROUPER(x):
12-
return x.month
8+
from .groupers import DAY_GROUPER, MONTH_GROUPER, PaddedDOYGrouper
9+
from .utils import QuantileMapper, ensure_samples_features
1310

1411

1512
class BcsdBase(TimeSynchronousDownscaler):
16-
""" Base class for BCSD model.
17-
"""
13+
"""Base class for BCSD model."""
1814

1915
_fit_attributes = ['y_climo_', 'quantile_mappers_']
2016
_timestep = 'M'
2117

22-
def __init__(self, time_grouper=MONTH_GROUPER, return_anoms=True, qm_kwargs={}):
18+
def __init__(
19+
self,
20+
time_grouper=MONTH_GROUPER,
21+
climate_trend_grouper=DAY_GROUPER,
22+
climate_trend=MONTH_GROUPER,
23+
return_anoms=True,
24+
qm_kwargs={},
25+
):
26+
2327
self.time_grouper = time_grouper
28+
self.climate_trend_grouper = climate_trend_grouper
29+
self.climate_trend = climate_trend
2430
self.return_anoms = return_anoms
2531
self.qm_kwargs = qm_kwargs
2632

2733
def _pre_fit(self):
2834
if isinstance(self.time_grouper, str):
29-
self.time_grouper_ = pd.Grouper(freq=self.time_grouper)
35+
if self.time_grouper == 'daily_nasa-nex':
36+
self.time_grouper = PaddedDOYGrouper
37+
self.timestep = 'daily'
38+
else:
39+
self.time_grouper_ = pd.Grouper(freq=self.time_grouper)
40+
self.timestep = 'monthly'
3041
else:
3142
self.time_grouper_ = self.time_grouper
43+
self.timestep = 'monthly'
44+
45+
def _create_groups(self, df, climate_trend=False):
46+
"""helper function to create groups by either daily or month"""
47+
if self.timestep == 'monthly':
48+
return df.groupby(self.time_grouper)
49+
elif self.timestep == 'daily':
50+
if climate_trend:
51+
# group by day only rather than also +/- offset days
52+
return df.groupby(self.climate_trend_grouper)
53+
else:
54+
return self.time_grouper(df)
55+
else:
56+
raise TypeError('unexpected time grouper type %s' % self.time_grouper)
3257

3358
def _qm_fit_by_group(self, groups):
34-
""" helper function to fit quantile mappers by group
59+
"""helper function to fit quantile mappers by group
3560
3661
Note that we store these mappers for later
3762
"""
@@ -40,7 +65,7 @@ def _qm_fit_by_group(self, groups):
4065
self.quantile_mappers_[key] = QuantileMapper(**self.qm_kwargs).fit(group)
4166

4267
def _qm_transform_by_group(self, groups):
43-
""" helper function to apply quantile mapping by group
68+
"""helper function to apply quantile mapping by group
4469
4570
Note that we recombine the dataframes using pd.concat, there may be a better way to do this
4671
"""
@@ -51,9 +76,22 @@ def _qm_transform_by_group(self, groups):
5176
dfs.append(pd.DataFrame(qmapped, index=group.index, columns=group.columns))
5277
return pd.concat(dfs).sort_index()
5378

79+
def _remove_climatology(self, obj, climatology, climate_trend=False):
80+
"""helper function to remove climatologies"""
81+
dfs = []
82+
for key, group in self._create_groups(obj, climate_trend):
83+
if self.timestep == 'monthly':
84+
dfs.append(group - climatology.loc[key].values)
85+
elif self.timestep == 'daily':
86+
dfs.append(group - climatology.loc[key])
87+
88+
result = pd.concat(dfs).sort_index()
89+
assert obj.shape == result.shape
90+
return result
91+
5492

5593
class BcsdPrecipitation(BcsdBase):
56-
""" Classic BCSD model for Precipitation
94+
"""Classic BCSD model for Precipitation
5795
5896
Parameters
5997
----------
@@ -72,7 +110,7 @@ class BcsdPrecipitation(BcsdBase):
72110
"""
73111

74112
def fit(self, X, y):
75-
""" Fit BcsdPrecipitation model
113+
"""Fit BcsdPrecipitation model
76114
77115
Parameters
78116
----------
@@ -88,16 +126,19 @@ def fit(self, X, y):
88126

89127
self._pre_fit()
90128
X, y = self._validate_data(X, y, y_numeric=True)
129+
# TO-DO: set n_features_n attribute
91130
if self.n_features_in_ != 1:
92131
raise ValueError(f'BCSD only supports 1 feature, found {self.n_features_in_}')
93132

94-
y_groups = y.groupby(self.time_grouper)
133+
y_groups = self._create_groups(y)
95134
# calculate the climatologies
96135
self.y_climo_ = y_groups.mean()
136+
97137
if self.y_climo_.values.min() <= 0:
98138
raise ValueError('Invalid value in target climatology')
99139

100140
# fit the quantile mappers
141+
# TO-DO: do we need to detrend the data before fitting the quantile mappers??
101142
self._qm_fit_by_group(y_groups)
102143

103144
return self
@@ -119,23 +160,28 @@ def predict(self, X):
119160
X = self._validate_data(X)
120161

121162
# Bias correction
122-
# apply quantile mapping by month
123-
Xqm = self._qm_transform_by_group(X.groupby(self.time_grouper))
163+
# apply quantile mapping by month or day
164+
Xqm = self._qm_transform_by_group(self._create_groups(X, climate_trend=True))
124165

125166
# calculate the anomalies as a ratio of the training data
126167
if self.return_anoms:
127168
return self._calc_ratio_anoms(Xqm, self.y_climo_)
128169
else:
129170
return Xqm
130171

131-
def _calc_ratio_anoms(self, obj, climatology):
172+
def _calc_ratio_anoms(self, obj, climatology, climate_trend=False):
173+
"""helper function for dividing day groups by climatology"""
132174
dfs = []
133-
for key, group in obj.groupby(self.time_grouper):
134-
dfs.append(group / climatology.loc[key].values)
175+
for key, group in self._create_groups(obj, climate_trend):
176+
if self.timestep == 'monthly':
177+
dfs.append(group / climatology.loc[key].values)
178+
else:
179+
dfs.append(group / climatology.loc[key])
180+
181+
result = pd.concat(dfs).sort_index()
182+
assert obj.shape == result.shape
135183

136-
out = pd.concat(dfs).sort_index()
137-
assert obj.shape == out.shape
138-
return out
184+
return result
139185

140186
def _more_tags(self):
141187
return {
@@ -162,7 +208,7 @@ def _more_tags(self):
162208

163209
class BcsdTemperature(BcsdBase):
164210
def fit(self, X, y):
165-
""" Fit BcsdTemperature model
211+
"""Fit BcsdTemperature model
166212
167213
Parameters
168214
----------
@@ -175,14 +221,18 @@ def fit(self, X, y):
175221
-------
176222
self : returns an instance of self.
177223
"""
224+
178225
self._pre_fit()
179226
X, y = self._validate_data(X, y, y_numeric=True)
227+
# TO-DO: set n_features_in attribute
180228
if self.n_features_in_ != 1:
181-
raise ValueError(f'BCSD only supports 1 feature, found {self.n_features_in_}')
229+
raise ValueError(f'BCSD only supports up to 4 features, found {self.n_features_in_}')
230+
231+
# make groups for day or month
232+
y_groups = self._create_groups(y)
182233

183234
# calculate the climatologies
184-
self._x_climo = X.groupby(self.time_grouper).mean()
185-
y_groups = y.groupby(self.time_grouper)
235+
self._x_climo = self._create_groups(X).mean()
186236
self.y_climo_ = y_groups.mean()
187237

188238
# fit the quantile mappers
@@ -191,7 +241,7 @@ def fit(self, X, y):
191241
return self
192242

193243
def predict(self, X):
194-
""" Predict using the BcsdTemperature model
244+
"""Predict using the BcsdTemperature model
195245
196246
Parameters
197247
----------
@@ -206,42 +256,44 @@ def predict(self, X):
206256
check_is_fitted(self)
207257
X = self._check_array(X)
208258

209-
# X = ensure_samples_features(X) # don't need????
210-
211259
# Calculate the 9-year running mean for each month
212260
def rolling_func(x):
213261
return x.rolling(9, center=True, min_periods=1).mean()
214262

215-
X_rolling_mean = X.groupby(self.time_grouper, group_keys=False).apply(rolling_func)
263+
X_rolling_mean = X.groupby(self.climate_trend, group_keys=False).apply(rolling_func)
216264

217-
# calc shift
218-
# why isn't this working??
219-
# X_shift = X_rolling_mean.groupby(self.time_grouper) - self._x_climo
220-
X_shift = self._remove_climatology(X_rolling_mean, self._x_climo)
265+
# remove climatology from 9-year monthly mean climate trend
266+
X_shift = self._remove_climatology(X_rolling_mean, self._x_climo, climate_trend=True)
221267

222-
# remove shift
268+
# remove shift from model data
223269
X_no_shift = X - X_shift
224270

225271
# Bias correction
226-
# apply quantile mapping by month
227-
Xqm = self._qm_transform_by_group(X_no_shift.groupby(self.time_grouper))
272+
# apply quantile mapping by month or day
273+
Xqm = self._qm_transform_by_group(self._create_groups(X_no_shift, climate_trend=True))
228274

229-
# restore the shift
275+
# restore the climate trend
230276
X_qm_with_shift = X_shift + Xqm
231-
# calculate the anomalies
277+
278+
# return bias corrected absolute values or calculate the anomalies
232279
if self.return_anoms:
233280
return self._remove_climatology(X_qm_with_shift, self.y_climo_)
234281
else:
235282
return X_qm_with_shift
236283

237-
def _remove_climatology(self, obj, climatology):
284+
def _remove_climatology(self, obj, climatology, climate_trend=False):
285+
"""helper function to remove climatologies"""
238286
dfs = []
239-
for key, group in obj.groupby(self.time_grouper):
240-
dfs.append(group - climatology.loc[key].values)
241-
242-
out = pd.concat(dfs).sort_index()
243-
assert obj.shape == out.shape
244-
return out
287+
for key, group in self._create_groups(obj, climate_trend):
288+
if self.timestep == 'monthly':
289+
dfs.append(group - climatology.loc[key].values)
290+
elif self.timestep == 'daily':
291+
dfs.append(group - climatology.loc[key].values)
292+
293+
result = pd.concat(dfs).sort_index()
294+
if obj.shape != result.shape:
295+
raise ValueError('shape of climo is not equal to input array')
296+
return result
245297

246298
def _more_tags(self):
247299
return {
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import warnings
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
7+
class SkdownscaleGroupGeneratorBase:
8+
pass
9+
10+
11+
def MONTH_GROUPER(x):
12+
return x.month
13+
14+
15+
def DAY_GROUPER(x):
16+
return x.day
17+
18+
19+
class PaddedDOYGrouper(SkdownscaleGroupGeneratorBase):
20+
def __init__(self, df, offset=15):
21+
self.n = 1
22+
self.df = df
23+
self.max = 366
24+
# check for leap days
25+
# if leap days present, flag for day groups count
26+
if len(self.df[((self.df.index.month == 2) & (self.df.index.day == 29))]) > 0:
27+
self.leap = 'leap'
28+
else:
29+
self.leap = 'noleap'
30+
# split up data by leap and non leap years
31+
# necessary because pandas dayofyear
32+
self.df_leap = self.df[self.df.index.is_leap_year]
33+
self.df_noleap = self.df[~self.df.index.is_leap_year]
34+
self.offset = offset
35+
self.days_of_nonleap_year = np.arange(self.n, self.max)
36+
self.days_of_leap_year = np.arange(self.n, self.max + 1)
37+
self.days_of_nonleap_year_wrapped = np.pad(
38+
self.days_of_nonleap_year, self.offset, mode='wrap'
39+
)
40+
self.days_of_leap_year_wrapped = np.pad(self.days_of_leap_year, self.offset, mode='wrap')
41+
42+
def __iter__(self):
43+
self.n = 1
44+
return self
45+
46+
def __next__(self):
47+
# n as day of year
48+
if self.n > self.max:
49+
raise StopIteration
50+
51+
i = self.n - 1
52+
total_days = (2 * self.offset) + 1
53+
54+
# create day groups with +/- offset # of days
55+
first_set_leap = self.days_of_leap_year_wrapped[i : i + self.offset]
56+
first_set_noleap = self.days_of_nonleap_year_wrapped[i : i + self.offset]
57+
58+
sec_set_leap = self.days_of_leap_year_wrapped[self.n + self.offset : i + total_days]
59+
sec_set_noleap = self.days_of_nonleap_year_wrapped[self.n + self.offset : i + total_days]
60+
61+
all_days_leap = np.concatenate((first_set_leap, np.array([self.n]), sec_set_leap), axis=0)
62+
all_days_noleap = np.concatenate(
63+
(first_set_noleap, np.array([self.n]), sec_set_noleap), axis=0
64+
)
65+
66+
# check that day groups contain the correct number of days
67+
if len(set(all_days_leap)) != total_days and self.leap == 'noleap':
68+
warnings.warn('leap days not included, day groups in leap years missing leap days')
69+
70+
if len(set(all_days_noleap)) != total_days and self.n != 366:
71+
raise ValueError('no leap day groups do not contain the correct set of days')
72+
73+
result = pd.concat(
74+
[
75+
self.df_leap[self.df_leap.index.dayofyear.isin(all_days_leap)],
76+
self.df_noleap[self.df_noleap.index.dayofyear.isin(all_days_noleap)],
77+
]
78+
)
79+
80+
self.n += 1
81+
82+
return self.n - 1, result
83+
84+
def mean(self):
85+
arr_means = np.full((self.max, 1), np.inf)
86+
for key, group in self:
87+
arr_means[key - 1] = group.mean().values[0]
88+
result = pd.DataFrame(arr_means, index=self.days_of_leap_year)
89+
return result

0 commit comments

Comments
 (0)