pangeo-data · jhamman · Nov 5, 2020 · Oct 29, 2019 · Oct 29, 2019 · Apr 2, 2020
diff --git a/skdownscale/pointwise_models/__init__.py b/skdownscale/pointwise_models/__init__.py
@@ -1,5 +1,6 @@
 from .bcsd import BcsdPrecipitation, BcsdTemperature
 from .core import PointWiseDownscaler
 from .gard import AnalogRegression, PureAnalog
+from .groupers import DAY_GROUPER, MONTH_GROUPER, PaddedDOYGrouper
 from .utils import LinearTrendTransformer, QuantileMapper
 from .zscore import ZScoreRegressor
diff --git a/skdownscale/pointwise_models/bcsd.py b/skdownscale/pointwise_models/bcsd.py
@@ -5,31 +5,56 @@
 from sklearn.utils.validation import check_is_fitted
 
 from .base import TimeSynchronousDownscaler
-from .utils import QuantileMapper
-
-
-def MONTH_GROUPER(x):
-    return x.month
+from .groupers import DAY_GROUPER, MONTH_GROUPER, PaddedDOYGrouper
+from .utils import QuantileMapper, check_datetime_index, ensure_samples_features
 
 
 class BcsdBase(TimeSynchronousDownscaler):
     """ Base class for BCSD model.
     """
 
-    _fit_attributes = ['y_climo_', 'quantile_mappers_']
+    _fit_attributes = ["y_climo_", "quantile_mappers_"]
     _timestep = 'M'
 
-    def __init__(self, time_grouper=MONTH_GROUPER, return_anoms=True, qm_kwargs={}):
+    def __init__(
+        self,
+        time_grouper=MONTH_GROUPER,
+        climate_trend_grouper=DAY_GROUPER,
+        return_anoms=True,
+        **qm_kwargs):
+
         self.time_grouper = time_grouper
+        self.climate_trend_grouper = climate_trend_grouper
+        self.climate_trend = MONTH_GROUPER
         self.return_anoms = return_anoms
         self.qm_kwargs = qm_kwargs
 
     def _pre_fit(self):
         if isinstance(self.time_grouper, str):
-            self.time_grouper_ = pd.Grouper(freq=self.time_grouper)
+            if time_grouper == "daily_nasa-nex":
+                self.time_grouper = PaddedDOYGrouper
+                self.climate_trend_grouper = climate_trend_grouper
+                self.timestep = "daily"
+            else:  
+                self.time_grouper_ = pd.Grouper(freq=self.time_grouper)
         else:
             self.time_grouper_ = self.time_grouper
+            self.timestep = "monthly" 
 
+    def _create_groups(self, df, climate_trend=False):
+        """ helper function to create groups by either daily or month
+        """
+        if self.timestep == "monthly":
+            return df.groupby(self.time_grouper)
+        elif self.timestep == "daily":
+            if climate_trend:
+                # group by day only rather than also +/- offset days
+                return df.groupby(self.climate_trend_grouper)
+            else:
+                return self.time_grouper(df)
+        else:
+            raise TypeError("unexpected time grouper type %s" % self.time_grouper)
+
     def _qm_fit_by_group(self, groups):
         """ helper function to fit quantile mappers by group
 
@@ -51,6 +76,21 @@ def _qm_transform_by_group(self, groups):
             dfs.append(pd.DataFrame(qmapped, index=group.index, columns=group.columns))
         return pd.concat(dfs).sort_index()
 
+    def _remove_climatology(self, obj, climatology, climate_trend=False):
+        """helper function to remove climatologies
+
+        """
+        dfs = []
+        for key, group in self._create_groups(obj, climate_trend):
+            if self.timestep == "monthly":
+                dfs.append(group - climatology.loc[key].values)
+            elif self.timestep == "daily":
+                dfs.append(group - climatology.loc[key])
+
+        result = pd.concat(dfs).sort_index()
+        assert obj.shape == result.shape
+        return result
+
 
 class BcsdPrecipitation(BcsdBase):
     """ Classic BCSD model for Precipitation
@@ -91,13 +131,15 @@ def fit(self, X, y):
         if self.n_features_in_ != 1:
             raise ValueError(f'BCSD only supports 1 feature, found {self.n_features_in_}')
 
-        y_groups = y.groupby(self.time_grouper)
+        y_groups = self._create_groups(y)
         # calculate the climatologies
         self.y_climo_ = y_groups.mean()
+
         if self.y_climo_.values.min() <= 0:
             raise ValueError('Invalid value in target climatology')
 
         # fit the quantile mappers
+        # TO-DO: do we need to detrend the data before fitting the quantile mappers??
         self._qm_fit_by_group(y_groups)
 
         return self
@@ -119,23 +161,29 @@ def predict(self, X):
         X = self._validate_data(X)
 
         # Bias correction
-        # apply quantile mapping by month
-        Xqm = self._qm_transform_by_group(X.groupby(self.time_grouper))
+        # apply quantile mapping by month or day
+        Xqm = self._qm_transform_by_group(self._create_groups(X, climate_trend=True))
 
         # calculate the anomalies as a ratio of the training data
         if self.return_anoms:
             return self._calc_ratio_anoms(Xqm, self.y_climo_)
         else:
             return Xqm
 
-    def _calc_ratio_anoms(self, obj, climatology):
+    def _calc_ratio_anoms(self, obj, climatology, climate_trend=False):
+        """ helper function for dividing day groups by climatology
+        """
         dfs = []
-        for key, group in obj.groupby(self.time_grouper):
-            dfs.append(group / climatology.loc[key].values)
+        for key, group in self._create_groups(obj, climate_trend):
+            if self.timestep == "monthly":
+                dfs.append(group / climatology.loc[key].values)
+            else:
+                dfs.append(group / climatology.loc[key])
 
-        out = pd.concat(dfs).sort_index()
-        assert obj.shape == out.shape
-        return out
+        result = pd.concat(dfs).sort_index()
+        assert obj.shape == result.shape
+
+        return result
 
     def _more_tags(self):
         return {
@@ -175,14 +223,17 @@ def fit(self, X, y):
         -------
         self : returns an instance of self.
         """
+
         self._pre_fit()
         X, y = self._validate_data(X, y, y_numeric=True)
         if self.n_features_in_ != 1:
             raise ValueError(f'BCSD only supports 1 feature, found {self.n_features_in_}')
 
+        # make groups for day or month
+        y_groups = self._create_groups(y)
+
         # calculate the climatologies
-        self._x_climo = X.groupby(self.time_grouper).mean()
-        y_groups = y.groupby(self.time_grouper)
+        self._x_climo = self._create_groups(X).mean()
         self.y_climo_ = y_groups.mean()
 
         # fit the quantile mappers
@@ -206,29 +257,26 @@ def predict(self, X):
         check_is_fitted(self)
         X = self._check_array(X)
 
-        # X = ensure_samples_features(X)  # don't need????
-
         # Calculate the 9-year running mean for each month
         def rolling_func(x):
             return x.rolling(9, center=True, min_periods=1).mean()
 
-        X_rolling_mean = X.groupby(self.time_grouper, group_keys=False).apply(rolling_func)
+        X_rolling_mean = X.groupby(self.climate_trend, group_keys=False).apply(rolling_func)
 
-        # calc shift
-        # why isn't this working??
-        # X_shift = X_rolling_mean.groupby(self.time_grouper) - self._x_climo
-        X_shift = self._remove_climatology(X_rolling_mean, self._x_climo)
+        # remove climatology from 9-year monthly mean climate trend
+        X_shift = self._remove_climatology(X_rolling_mean, self._x_climo, climate_trend=True)
 
-        # remove shift
+        # remove shift from model data
         X_no_shift = X - X_shift
 
         # Bias correction
-        # apply quantile mapping by month
-        Xqm = self._qm_transform_by_group(X_no_shift.groupby(self.time_grouper))
+        # apply quantile mapping by month or day
+        Xqm = self._qm_transform_by_group(self._create_groups(X_no_shift, climate_trend=True))
 
-        # restore the shift
+        # restore the climate trend
         X_qm_with_shift = X_shift + Xqm
-        # calculate the anomalies
+
+        # return bias corrected absolute values or calculate the anomalies
         if self.return_anoms:
             return self._remove_climatology(X_qm_with_shift, self.y_climo_)
         else:

diff --git a/skdownscale/pointwise_models/groupers.py b/skdownscale/pointwise_models/groupers.py
@@ -0,0 +1,55 @@
+import numpy as np
+import pandas as pd
+
+
+class SkdownscaleGroupGeneratorBase:
+    pass
+
+
+def MONTH_GROUPER(x):
+    return x.month
+
+def DAY_GROUPER(x):
+    return x.day
+
+class PaddedDOYGrouper(SkdownscaleGroupGeneratorBase):
+    def __init__(self, df, offset=15):
+        self.df = df
+        self.offset = offset
+        self.max = 365
+        self.days_of_year = np.arange(1, 366)
+        self.days_of_year_wrapped = np.pad(self.days_of_year, 15, mode="wrap")
+        self.n = 1
+
+    def __iter__(self):
+        self.n = 1
+        return self
+
+    def __next__(self):
+        # n as day of year
+        if self.n > self.max:
+            raise StopIteration
+
+        i = self.n - 1
+        total_days = (2 * self.offset) + 1
+
+        # create day groups with +/- days
+        # number of days defined by offset
+        first_half = self.days_of_year_wrapped[i : i + self.offset]
+        sec_half = self.days_of_year_wrapped[self.n + self.offset : i + total_days]
+        all_days = np.concatenate((first_half, np.array([self.n]), sec_half), axis=0)
+
+        assert len(set(all_days)) == total_days, all_days
+
+        result = self.df[self.df.index.dayofyear.isin(all_days)]
+
+        self.n += 1
+
+        return self.n - 1, result
+
+    def mean(self):
+        list_result = []
+        for key, group in self:
+            list_result.append(group.mean().values[0])
+        result = pd.Series(list_result, index=self.days_of_year)
+        return result
diff --git a/skdownscale/pointwise_models/utils.py b/skdownscale/pointwise_models/utils.py
@@ -168,3 +168,31 @@ def transform(self, X):
 
     def _more_tags(self):
         return {'_xfail_checks': {'check_methods_subset_invariance': 'because'}}
+
+def ensure_samples_features(obj):
+    """ helper function to ensure sammples conform to sklearn format
+    requirements
+    """
+    if isinstance(obj, pd.DataFrame):
+        return obj
+    if isinstance(obj, pd.Series):
+        return obj.to_frame()
+    if isinstance(obj, np.ndarray):
+        if obj.ndim == 2:
+            return obj
+        if obj.ndim == 1:
+            return obj.reshape(-1, 1)
+    return obj  # hope for the best, probably better to raise an error here
+
+def check_datetime_index(obj, timestep):
+    """ helper function to check datetime index for compatibility
+    """
+    if isinstance(obj, pd.DataFrame):
+        if timestep == "daily":
+            obj.index = obj.index.values.astype("datetime64[D]")
+            return obj
+        elif timestep == "monthly":
+            obj.index = obj.index.values.astype("datetime64[M]")
+            return obj
+        else:
+            raise ValueError("this frequency has not yet been implemented in scikit-downscale")