pangeo-data · jhamman · Nov 5, 2020 · Oct 29, 2019 · Oct 29, 2019 · Apr 2, 2020
diff --git a/skdownscale/pointwise_models/__init__.py b/skdownscale/pointwise_models/__init__.py
@@ -1,5 +1,6 @@
 from .bcsd import BcsdPrecipitation, BcsdTemperature
 from .core import PointWiseDownscaler
 from .gard import AnalogRegression, PureAnalog
+from .groupers import PaddedDOYGrouper
 from .utils import LinearTrendTransformer, QuantileMapper
 from .zscore import ZScoreRegressor
diff --git a/skdownscale/pointwise_models/bcsd.py b/skdownscale/pointwise_models/bcsd.py
@@ -7,28 +7,60 @@
 from sklearn.utils.validation import check_is_fitted
 
 from .base import AbstractDownscaler
-from .utils import QuantileMapper, ensure_samples_features
+from .groupers import PaddedDOYGrouper
+from .utils import QuantileMapper, check_datetime_index, ensure_samples_features
 
 
 def MONTH_GROUPER(x):
     return x.month
 
 
+def DAY_GROUPER(x):
+    return x.day
+
+
 class BcsdBase(AbstractDownscaler):
     """ Base class for BCSD model.
     """
 
     _fit_attributes = ["y_climo_", "quantile_mappers_"]
 
-    def __init__(self, time_grouper=MONTH_GROUPER, return_anoms=True, **qm_kwargs):
+    def __init__(
+        self,
+        time_grouper=MONTH_GROUPER,
+        climate_trend_grouper=DAY_GROUPER,
+        return_anoms=True,
+        **qm_kwargs
+    ):
         if isinstance(time_grouper, str):
-            self.time_grouper = pd.Grouper(freq=time_grouper)
+            if time_grouper == "daily_nasa-nex":
+                self.time_grouper = PaddedDOYGrouper
+                self.climate_trend_grouper = climate_trend_grouper
+                self.timestep = "daily"
+            else:
+                raise TypeError("this functionality has not yet been implemented")
         else:
             self.time_grouper = time_grouper
+            self.timestep = "monthly"
 
+        self.climate_trend = MONTH_GROUPER
         self.return_anoms = return_anoms
         self.qm_kwargs = qm_kwargs
 
+    def _create_groups(self, df, climate_trend=False):
+        """ helper function to create groups by either daily or month
+        """
+        if self.timestep == "monthly":
+            return df.groupby(self.time_grouper)
+        elif self.timestep == "daily":
+            if climate_trend:
+                # group by day only rather than also +/- offset days
+                return df.groupby(self.climate_trend_grouper)
+            else:
+                return self.time_grouper(df)
+        else:
+            raise TypeError("unexpected time grouper type %s" % self.time_grouper)
+
     def _qm_fit_by_group(self, groups):
         """ helper function to fit quantile mappers by group
 
@@ -52,6 +84,21 @@ def _qm_transform_by_group(self, groups):
             dfs.append(pd.DataFrame(qmapped, index=group.index, columns=data.columns))
         return pd.concat(dfs).sort_index()
 
+    def _remove_climatology(self, obj, climatology, climate_trend=False):
+        """helper function to remove climatologies
+
+        """
+        dfs = []
+        for key, group in self._create_groups(obj, climate_trend):
+            if self.timestep == "monthly":
+                dfs.append(group - climatology.loc[key].values)
+            elif self.timestep == "daily":
+                dfs.append(group - climatology.loc[key])
+
+        result = pd.concat(dfs).sort_index()
+        assert obj.shape == result.shape
+        return result
+
 
 class BcsdPrecipitation(BcsdBase):
     """ Classic BCSD model for Precipitation
@@ -86,13 +133,16 @@ def fit(self, X, y):
         -------
         self : returns an instance of self.
         """
-        y_groups = y.groupby(self.time_grouper)
+        y_groups = self._create_groups(y)
+
         # calculate the climatologies
         self.y_climo_ = y_groups.mean()
+
         if self.y_climo_.values.min() <= 0:
             raise ValueError("Invalid value in target climatology")
 
         # fit the quantile mappers
+        # TO-DO: do we need to detrend the data before fitting the quantile mappers??
         self._qm_fit_by_group(y_groups)
 
         return self
@@ -114,23 +164,29 @@ def predict(self, X):
         X = ensure_samples_features(X)
 
         # Bias correction
-        # apply quantile mapping by month
-        Xqm = self._qm_transform_by_group(X.groupby(self.time_grouper))
+        # apply quantile mapping by month or day
+        Xqm = self._qm_transform_by_group(self._create_groups(X, climate_trend=True))
 
         # calculate the anomalies as a ratio of the training data
         if self.return_anoms:
             return self._calc_ratio_anoms(Xqm, self.y_climo_)
         else:
             return Xqm
 
-    def _calc_ratio_anoms(self, obj, climatology):
+    def _calc_ratio_anoms(self, obj, climatology, climate_trend=False):
+        """ helper function for dividing day groups by climatology
+        """
         dfs = []
-        for key, group in obj.groupby(self.time_grouper):
-            dfs.append(group / climatology.loc[key].values)
+        for key, group in self._create_groups(obj, climate_trend):
+            if self.timestep == "monthly":
+                dfs.append(group / climatology.loc[key].values)
+            else:
+                dfs.append(group / climatology.loc[key])
 
-        out = pd.concat(dfs).sort_index()
-        assert obj.shape == out.shape
-        return out
+        result = pd.concat(dfs).sort_index()
+        assert obj.shape == result.shape
+
+        return result
 
 
 class BcsdTemperature(BcsdBase):
@@ -148,9 +204,12 @@ def fit(self, X, y):
         -------
         self : returns an instance of self.
         """
+
+        # make groups for day or month
+        y_groups = self._create_groups(y)
+
         # calculate the climatologies
-        self._x_climo = X.groupby(self.time_grouper).mean()
-        y_groups = y.groupby(self.time_grouper)
+        self._x_climo = self._create_groups(X).mean()
         self.y_climo_ = y_groups.mean()
 
         # fit the quantile mappers
@@ -178,33 +237,23 @@ def predict(self, X):
         def rolling_func(x):
             return x.rolling(9, center=True, min_periods=1).mean()
 
-        X_rolling_mean = X.groupby(self.time_grouper).apply(rolling_func)
+        X_rolling_mean = X.groupby(self.climate_trend).apply(rolling_func)
 
-        # calc shift
-        # why isn't this working??
-        # X_shift = X_rolling_mean.groupby(self.time_grouper) - self._x_climo
-        X_shift = self._remove_climatology(X_rolling_mean, self._x_climo)
+        # remove climatology from 9-year monthly mean climate trend
+        X_shift = self._remove_climatology(X_rolling_mean, self._x_climo, climate_trend=True)
 
-        # remove shift
+        # remove shift from model data
         X_no_shift = X - X_shift
 
         # Bias correction
-        # apply quantile mapping by month
-        Xqm = self._qm_transform_by_group(X_no_shift.groupby(self.time_grouper))
+        # apply quantile mapping by month or day
+        Xqm = self._qm_transform_by_group(self._create_groups(X_no_shift, climate_trend=True))
 
-        # restore the shift
+        # restore the climate trend
         X_qm_with_shift = X_shift + Xqm
-        # calculate the anomalies
+
+        # return bias corrected absolute values or calculate the anomalies
         if self.return_anoms:
             return self._remove_climatology(X_qm_with_shift, self.y_climo_)
         else:
             return X_qm_with_shift
-
-    def _remove_climatology(self, obj, climatology):
-        dfs = []
-        for key, group in obj.groupby(self.time_grouper):
-            dfs.append(group - climatology.loc[key].values)
-
-        out = pd.concat(dfs).sort_index()
-        assert obj.shape == out.shape
-        return out
diff --git a/skdownscale/pointwise_models/groupers.py b/skdownscale/pointwise_models/groupers.py
@@ -0,0 +1,49 @@
+import numpy as np
+import pandas as pd
+
+
+class SkdownscaleGroupGeneratorBase:
+    pass
+
+
+class PaddedDOYGrouper(SkdownscaleGroupGeneratorBase):
+    def __init__(self, df, offset=15):
+        self.df = df
+        self.offset = offset
+        self.max = 365
+        self.days_of_year = np.arange(1, 366)
+        self.days_of_year_wrapped = np.pad(self.days_of_year, 15, mode="wrap")
+        self.n = 1
+
+    def __iter__(self):
+        self.n = 1
+        return self
+
+    def __next__(self):
+        # n as day of year
+        if self.n > self.max:
+            raise StopIteration
+
+        i = self.n - 1
+        total_days = (2 * self.offset) + 1
+
+        # create day groups with +/- days
+        # number of days defined by offset
+        first_half = self.days_of_year_wrapped[i : i + self.offset]
+        sec_half = self.days_of_year_wrapped[self.n + self.offset : i + total_days]
+        all_days = np.concatenate((first_half, np.array([self.n]), sec_half), axis=0)
+
+        assert len(set(all_days)) == total_days, all_days
+
+        result = self.df[self.df.index.dayofyear.isin(all_days)]
+
+        self.n += 1
+
+        return self.n - 1, result
+
+    def mean(self):
+        list_result = []
+        for key, group in self:
+            list_result.append(group.mean().values[0])
+        result = pd.Series(list_result, index=self.days_of_year)
+        return result
diff --git a/skdownscale/pointwise_models/utils.py b/skdownscale/pointwise_models/utils.py
@@ -166,3 +166,17 @@ def ensure_samples_features(obj):
         if obj.ndim == 1:
             return obj.reshape(-1, 1)
     return obj  # hope for the best, probably better to raise an error here
+
+
+def check_datetime_index(obj, timestep):
+    """ helper function to check datetime index for compatibility
+    """
+    if isinstance(obj, pd.DataFrame):
+        if timestep == "daily":
+            obj.index = obj.index.values.astype("datetime64[D]")
+            return obj
+        elif timestep == "monthly":
+            obj.index = obj.index.values.astype("datetime64[M]")
+            return obj
+        else:
+            raise ValueError("this frequency has not yet been implemented in scikit-downscale")