5
5
from sklearn .utils .validation import check_is_fitted
6
6
7
7
from .base import TimeSynchronousDownscaler
8
- from .utils import QuantileMapper
9
-
10
-
11
- def MONTH_GROUPER (x ):
12
- return x .month
8
+ from .groupers import DAY_GROUPER , MONTH_GROUPER , PaddedDOYGrouper
9
+ from .utils import QuantileMapper , ensure_samples_features
13
10
14
11
15
12
class BcsdBase (TimeSynchronousDownscaler ):
16
- """ Base class for BCSD model.
17
- """
13
+ """Base class for BCSD model."""
18
14
19
15
_fit_attributes = ['y_climo_' , 'quantile_mappers_' ]
20
16
_timestep = 'M'
21
17
22
- def __init__ (self , time_grouper = MONTH_GROUPER , return_anoms = True , qm_kwargs = {}):
18
+ def __init__ (
19
+ self ,
20
+ time_grouper = MONTH_GROUPER ,
21
+ climate_trend_grouper = DAY_GROUPER ,
22
+ climate_trend = MONTH_GROUPER ,
23
+ return_anoms = True ,
24
+ qm_kwargs = {},
25
+ ):
26
+
23
27
self .time_grouper = time_grouper
28
+ self .climate_trend_grouper = climate_trend_grouper
29
+ self .climate_trend = climate_trend
24
30
self .return_anoms = return_anoms
25
31
self .qm_kwargs = qm_kwargs
26
32
27
33
def _pre_fit (self ):
28
34
if isinstance (self .time_grouper , str ):
29
- self .time_grouper_ = pd .Grouper (freq = self .time_grouper )
35
+ if self .time_grouper == 'daily_nasa-nex' :
36
+ self .time_grouper = PaddedDOYGrouper
37
+ self .timestep = 'daily'
38
+ else :
39
+ self .time_grouper_ = pd .Grouper (freq = self .time_grouper )
40
+ self .timestep = 'monthly'
30
41
else :
31
42
self .time_grouper_ = self .time_grouper
43
+ self .timestep = 'monthly'
44
+
45
+ def _create_groups (self , df , climate_trend = False ):
46
+ """helper function to create groups by either daily or month"""
47
+ if self .timestep == 'monthly' :
48
+ return df .groupby (self .time_grouper )
49
+ elif self .timestep == 'daily' :
50
+ if climate_trend :
51
+ # group by day only rather than also +/- offset days
52
+ return df .groupby (self .climate_trend_grouper )
53
+ else :
54
+ return self .time_grouper (df )
55
+ else :
56
+ raise TypeError ('unexpected time grouper type %s' % self .time_grouper )
32
57
33
58
def _qm_fit_by_group (self , groups ):
34
- """ helper function to fit quantile mappers by group
59
+ """helper function to fit quantile mappers by group
35
60
36
61
Note that we store these mappers for later
37
62
"""
@@ -40,7 +65,7 @@ def _qm_fit_by_group(self, groups):
40
65
self .quantile_mappers_ [key ] = QuantileMapper (** self .qm_kwargs ).fit (group )
41
66
42
67
def _qm_transform_by_group (self , groups ):
43
- """ helper function to apply quantile mapping by group
68
+ """helper function to apply quantile mapping by group
44
69
45
70
Note that we recombine the dataframes using pd.concat, there may be a better way to do this
46
71
"""
@@ -51,9 +76,22 @@ def _qm_transform_by_group(self, groups):
51
76
dfs .append (pd .DataFrame (qmapped , index = group .index , columns = group .columns ))
52
77
return pd .concat (dfs ).sort_index ()
53
78
79
+ def _remove_climatology (self , obj , climatology , climate_trend = False ):
80
+ """helper function to remove climatologies"""
81
+ dfs = []
82
+ for key , group in self ._create_groups (obj , climate_trend ):
83
+ if self .timestep == 'monthly' :
84
+ dfs .append (group - climatology .loc [key ].values )
85
+ elif self .timestep == 'daily' :
86
+ dfs .append (group - climatology .loc [key ])
87
+
88
+ result = pd .concat (dfs ).sort_index ()
89
+ assert obj .shape == result .shape
90
+ return result
91
+
54
92
55
93
class BcsdPrecipitation (BcsdBase ):
56
- """ Classic BCSD model for Precipitation
94
+ """Classic BCSD model for Precipitation
57
95
58
96
Parameters
59
97
----------
@@ -72,7 +110,7 @@ class BcsdPrecipitation(BcsdBase):
72
110
"""
73
111
74
112
def fit (self , X , y ):
75
- """ Fit BcsdPrecipitation model
113
+ """Fit BcsdPrecipitation model
76
114
77
115
Parameters
78
116
----------
@@ -88,16 +126,19 @@ def fit(self, X, y):
88
126
89
127
self ._pre_fit ()
90
128
X , y = self ._validate_data (X , y , y_numeric = True )
129
+ # TO-DO: set n_features_n attribute
91
130
if self .n_features_in_ != 1 :
92
131
raise ValueError (f'BCSD only supports 1 feature, found { self .n_features_in_ } ' )
93
132
94
- y_groups = y . groupby ( self .time_grouper )
133
+ y_groups = self ._create_groups ( y )
95
134
# calculate the climatologies
96
135
self .y_climo_ = y_groups .mean ()
136
+
97
137
if self .y_climo_ .values .min () <= 0 :
98
138
raise ValueError ('Invalid value in target climatology' )
99
139
100
140
# fit the quantile mappers
141
+ # TO-DO: do we need to detrend the data before fitting the quantile mappers??
101
142
self ._qm_fit_by_group (y_groups )
102
143
103
144
return self
@@ -119,23 +160,28 @@ def predict(self, X):
119
160
X = self ._validate_data (X )
120
161
121
162
# Bias correction
122
- # apply quantile mapping by month
123
- Xqm = self ._qm_transform_by_group (X . groupby ( self .time_grouper ))
163
+ # apply quantile mapping by month or day
164
+ Xqm = self ._qm_transform_by_group (self ._create_groups ( X , climate_trend = True ))
124
165
125
166
# calculate the anomalies as a ratio of the training data
126
167
if self .return_anoms :
127
168
return self ._calc_ratio_anoms (Xqm , self .y_climo_ )
128
169
else :
129
170
return Xqm
130
171
131
- def _calc_ratio_anoms (self , obj , climatology ):
172
+ def _calc_ratio_anoms (self , obj , climatology , climate_trend = False ):
173
+ """helper function for dividing day groups by climatology"""
132
174
dfs = []
133
- for key , group in obj .groupby (self .time_grouper ):
134
- dfs .append (group / climatology .loc [key ].values )
175
+ for key , group in self ._create_groups (obj , climate_trend ):
176
+ if self .timestep == 'monthly' :
177
+ dfs .append (group / climatology .loc [key ].values )
178
+ else :
179
+ dfs .append (group / climatology .loc [key ])
180
+
181
+ result = pd .concat (dfs ).sort_index ()
182
+ assert obj .shape == result .shape
135
183
136
- out = pd .concat (dfs ).sort_index ()
137
- assert obj .shape == out .shape
138
- return out
184
+ return result
139
185
140
186
def _more_tags (self ):
141
187
return {
@@ -162,7 +208,7 @@ def _more_tags(self):
162
208
163
209
class BcsdTemperature (BcsdBase ):
164
210
def fit (self , X , y ):
165
- """ Fit BcsdTemperature model
211
+ """Fit BcsdTemperature model
166
212
167
213
Parameters
168
214
----------
@@ -175,14 +221,18 @@ def fit(self, X, y):
175
221
-------
176
222
self : returns an instance of self.
177
223
"""
224
+
178
225
self ._pre_fit ()
179
226
X , y = self ._validate_data (X , y , y_numeric = True )
227
+ # TO-DO: set n_features_in attribute
180
228
if self .n_features_in_ != 1 :
181
- raise ValueError (f'BCSD only supports 1 feature, found { self .n_features_in_ } ' )
229
+ raise ValueError (f'BCSD only supports up to 4 features, found { self .n_features_in_ } ' )
230
+
231
+ # make groups for day or month
232
+ y_groups = self ._create_groups (y )
182
233
183
234
# calculate the climatologies
184
- self ._x_climo = X .groupby (self .time_grouper ).mean ()
185
- y_groups = y .groupby (self .time_grouper )
235
+ self ._x_climo = self ._create_groups (X ).mean ()
186
236
self .y_climo_ = y_groups .mean ()
187
237
188
238
# fit the quantile mappers
@@ -191,7 +241,7 @@ def fit(self, X, y):
191
241
return self
192
242
193
243
def predict (self , X ):
194
- """ Predict using the BcsdTemperature model
244
+ """Predict using the BcsdTemperature model
195
245
196
246
Parameters
197
247
----------
@@ -206,42 +256,44 @@ def predict(self, X):
206
256
check_is_fitted (self )
207
257
X = self ._check_array (X )
208
258
209
- # X = ensure_samples_features(X) # don't need????
210
-
211
259
# Calculate the 9-year running mean for each month
212
260
def rolling_func (x ):
213
261
return x .rolling (9 , center = True , min_periods = 1 ).mean ()
214
262
215
- X_rolling_mean = X .groupby (self .time_grouper , group_keys = False ).apply (rolling_func )
263
+ X_rolling_mean = X .groupby (self .climate_trend , group_keys = False ).apply (rolling_func )
216
264
217
- # calc shift
218
- # why isn't this working??
219
- # X_shift = X_rolling_mean.groupby(self.time_grouper) - self._x_climo
220
- X_shift = self ._remove_climatology (X_rolling_mean , self ._x_climo )
265
+ # remove climatology from 9-year monthly mean climate trend
266
+ X_shift = self ._remove_climatology (X_rolling_mean , self ._x_climo , climate_trend = True )
221
267
222
- # remove shift
268
+ # remove shift from model data
223
269
X_no_shift = X - X_shift
224
270
225
271
# Bias correction
226
- # apply quantile mapping by month
227
- Xqm = self ._qm_transform_by_group (X_no_shift . groupby ( self .time_grouper ))
272
+ # apply quantile mapping by month or day
273
+ Xqm = self ._qm_transform_by_group (self ._create_groups ( X_no_shift , climate_trend = True ))
228
274
229
- # restore the shift
275
+ # restore the climate trend
230
276
X_qm_with_shift = X_shift + Xqm
231
- # calculate the anomalies
277
+
278
+ # return bias corrected absolute values or calculate the anomalies
232
279
if self .return_anoms :
233
280
return self ._remove_climatology (X_qm_with_shift , self .y_climo_ )
234
281
else :
235
282
return X_qm_with_shift
236
283
237
- def _remove_climatology (self , obj , climatology ):
284
+ def _remove_climatology (self , obj , climatology , climate_trend = False ):
285
+ """helper function to remove climatologies"""
238
286
dfs = []
239
- for key , group in obj .groupby (self .time_grouper ):
240
- dfs .append (group - climatology .loc [key ].values )
241
-
242
- out = pd .concat (dfs ).sort_index ()
243
- assert obj .shape == out .shape
244
- return out
287
+ for key , group in self ._create_groups (obj , climate_trend ):
288
+ if self .timestep == 'monthly' :
289
+ dfs .append (group - climatology .loc [key ].values )
290
+ elif self .timestep == 'daily' :
291
+ dfs .append (group - climatology .loc [key ].values )
292
+
293
+ result = pd .concat (dfs ).sort_index ()
294
+ if obj .shape != result .shape :
295
+ raise ValueError ('shape of climo is not equal to input array' )
296
+ return result
245
297
246
298
def _more_tags (self ):
247
299
return {
0 commit comments