|
| 1 | +# data elaboration functions |
| 2 | +import numpy as np |
| 3 | +import pandas as pd |
| 4 | +import holidays as h |
| 5 | +from functools import reduce |
| 6 | + |
| 7 | +# datetime functions |
| 8 | +import dateutil |
| 9 | +import datetime |
| 10 | +from dateutil.relativedelta import relativedelta |
| 11 | + |
| 12 | +# custom functions |
| 13 | +from Code.Regressors.regressors import Regressors |
| 14 | +from Code.Utils.utils import AlphabeticalCombinations, Utils |
| 15 | + |
| 16 | +class SimilarDay: |
| 17 | + def get_similar_days_in_previous_year(dates, country): |
| 18 | + """ |
| 19 | + Retrieves the similar day for a given date. |
| 20 | + :param dates: a list-like object of dates, country as string |
| 21 | + :return: a Pandas series of similar days |
| 22 | + """ |
| 23 | + d = pd.to_datetime(pd.Series(dates)) |
| 24 | + holidays = eval("h." + country.capitalize() + "()") |
| 25 | + return d.apply(lambda x: SimilarDay.get_similar_day_in_previous_year(x, holidays)) |
| 26 | + |
| 27 | + def get_similar_days_in_previous_week(dates, country): |
| 28 | + """ |
| 29 | + Retrieves the similar day for a given date. |
| 30 | + :param dates: a list-like object of dates, country as string |
| 31 | + :return: a Pandas series of similar days |
| 32 | + """ |
| 33 | + d = pd.to_datetime(pd.Series(dates)) |
| 34 | + holidays = eval("h." + country.capitalize() + "()") |
| 35 | + return d.apply(lambda x: SimilarDay.get_similar_day_in_previous_week(x, holidays)) |
| 36 | + |
| 37 | + |
| 38 | + def get_similar_day_in_previous_year(d, holiday_calendar): |
| 39 | + """ |
| 40 | + Retrieves the similar day for a given date. If the given date is not an holiday, the similar day is the |
| 41 | + closest day of the previous year in terms of calendar position which shares the weekday. If such a date is an holiday, |
| 42 | + the same weekday of the week before is considered. |
| 43 | + If the given date is an holiday, its similar day is the closest holiday to the given date in the previous year. |
| 44 | + :param d: a date |
| 45 | + :param holiday_calendar: a calendar from holidays package |
| 46 | + :return: the similar day |
| 47 | + """ |
| 48 | + if not d or pd.isna(d): |
| 49 | + return None |
| 50 | + |
| 51 | + new_date = d - relativedelta(years=1) |
| 52 | + holiday = holiday_calendar.get(d) |
| 53 | + diff = d.weekday() - new_date.weekday() if d.weekday() >= new_date.weekday() \ |
| 54 | + else d.weekday() - new_date.weekday() + 7 |
| 55 | + |
| 56 | + if not holiday: |
| 57 | + new_date = new_date + datetime.timedelta(days=diff) |
| 58 | + while holiday_calendar.get(new_date): |
| 59 | + new_date = new_date - datetime.timedelta(days=7) |
| 60 | + # elif holiday == 'Pasqua di Resurrezione': |
| 61 | + # new_date = dateutil.easter.easter(new_date.year) |
| 62 | + # elif holiday == "Lunedì dell'Angelo": |
| 63 | + # new_date = dateutil.easter.easter(new_date.year) + datetime.timedelta(days=1) |
| 64 | + |
| 65 | + return new_date |
| 66 | + |
| 67 | + def get_similar_day_in_previous_week(d, holiday_calendar): |
| 68 | + """ |
| 69 | + Retrieves the similar day for a given date. If the given date is not an holiday, the similar day is the |
| 70 | + closest day of the previous year in terms of calendar position which shares the weekday. If such a date is an holiday, |
| 71 | + the same weekday of the week before is considered. |
| 72 | + If the given date is an holiday, its similar day is the closest holiday to the given date in the previous year. |
| 73 | + :param d: a date |
| 74 | + :param holiday_calendar: a calendar from holidays package |
| 75 | + :return: the similar day |
| 76 | + """ |
| 77 | + if not d or pd.isna(d): |
| 78 | + return None |
| 79 | + |
| 80 | + new_date = d - relativedelta(weeks=1) |
| 81 | + holiday = holiday_calendar.get(d) |
| 82 | + diff = d.weekday() - new_date.weekday() if d.weekday() >= new_date.weekday() \ |
| 83 | + else d.weekday() - new_date.weekday() + 7 |
| 84 | + |
| 85 | + if not holiday: |
| 86 | + new_date = new_date + datetime.timedelta(days=diff) |
| 87 | + while holiday_calendar.get(new_date): |
| 88 | + new_date = new_date - datetime.timedelta(days=7) |
| 89 | + # elif holiday == 'Pasqua di Resurrezione': |
| 90 | + # new_date = dateutil.easter.easter(new_date.year) |
| 91 | + # elif holiday == "Lunedì dell'Angelo": |
| 92 | + # new_date = dateutil.easter.easter(new_date.year) + datetime.timedelta(days=1) |
| 93 | + |
| 94 | + return new_date |
| 95 | + |
| 96 | +class StandardConsumption: |
| 97 | + def get_standard_consumption_as_mean(df, id, date_var, var, country): |
| 98 | + """ |
| 99 | + Retrieves the standard consumption for a given date as hourly monthly mean differentiated by holiday, weekend, weekdays. |
| 100 | + :params: dataframe and date_var as string, var as string, country as string |
| 101 | + :return: the similar day |
| 102 | + """ |
| 103 | + |
| 104 | + df = Regressors.add_holidays_by_country(df, date_var, country) |
| 105 | + df = Regressors.add_weekdays(df, date_var) |
| 106 | + df.loc[:, 'day'] = df.loc[:, date_var].dt.day |
| 107 | + df.loc[:, 'hour'] = df.loc[:, date_var].dt.hour |
| 108 | + df.loc[:, 'month'] = df.loc[:, date_var].dt.month |
| 109 | + |
| 110 | + timedelta = Utils.delta_format(abs(np.diff(df[date_var])).mean()) |
| 111 | + freq = Utils.find_freq(timedelta) |
| 112 | + |
| 113 | + if freq == 'D': |
| 114 | + freq_var='day' |
| 115 | + else: |
| 116 | + freq_var='hour' |
| 117 | + |
| 118 | + # Compute standard consumption as means |
| 119 | + mask = (~df[var].isnull()) & ((df.wd_mon==1) | (df.wd_tue==1) | (df.wd_wed==1) | (df.wd_thu==1) | (df.wd_fri==1)) & (df.holidays==0) |
| 120 | + df_mean_weekdays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index() |
| 121 | + new_var = var + '_std_weekdays' |
| 122 | + df_mean_weekdays.rename(columns={var: new_var}, inplace=True) |
| 123 | + df_mean_weekdays.loc[df_mean_weekdays[new_var]<0, new_var] = 0 |
| 124 | + |
| 125 | + mask = (~df[var].isnull()) & ((df.wd_sat==1) | (df.wd_sun==1)) & (df.holidays==0) |
| 126 | + df_mean_weekend = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index() |
| 127 | + new_var = var + '_std_weekend' |
| 128 | + df_mean_weekend.rename(columns={var: new_var}, inplace=True) |
| 129 | + df_mean_weekend.loc[df_mean_weekend[new_var]<0, new_var] = 0 |
| 130 | + |
| 131 | + mask = (~df[var].isnull()) & (df.holidays==1) |
| 132 | + df_mean_holidays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index() |
| 133 | + new_var = var + '_std_holidays' |
| 134 | + df_mean_holidays.rename(columns={var: new_var}, inplace=True) |
| 135 | + df_mean_holidays.loc[df_mean_holidays[new_var]<0, new_var] = 0 |
| 136 | + |
| 137 | + # Merging |
| 138 | + dfs = [df_mean_holidays, df_mean_weekdays, df_mean_weekend] |
| 139 | + df_mean = reduce(lambda left,right: pd.merge(left,right,how='outer', on=[id, 'month', freq_var], validate='1:1'), dfs) |
| 140 | + df = pd.merge(df, df_mean, how='left', on=[id, 'month', freq_var], validate='m:1') |
| 141 | + |
| 142 | + return df |
| 143 | + |
| 144 | + |
| 145 | + def get_minimum_consumption(df, date_var, var, country): |
| 146 | + """ |
| 147 | + Retrieves the minimum consumption for a given date as hourly monthly minimum value differentiated by holiday, weekend, night. |
| 148 | + :params: dataframe and date_var as string, var as string, country as string |
| 149 | + :return: the similar day |
| 150 | + """ |
| 151 | + |
| 152 | + df = Regressors.add_holidays_by_country(df, date_var, country) |
| 153 | + df = Regressors.add_weekdays(df, date_var) |
| 154 | + df.loc[:, 'day'] = df.loc[:, date_var].dt.day |
| 155 | + df.loc[:, 'hour'] = df.loc[:, date_var].dt.hour |
| 156 | + df.loc[:, 'month'] = df.loc[:, date_var].dt.month |
| 157 | + |
| 158 | + timedelta = Utils.delta_format(abs(np.diff(df[date_var])).mean()) |
| 159 | + freq = Utils.find_freq(timedelta) |
| 160 | + |
| 161 | + if freq == 'D': |
| 162 | + freq_var='day' |
| 163 | + else: |
| 164 | + freq_var='hour' |
| 165 | + |
| 166 | + # Compute min consumption |
| 167 | + mask = (~df[var].isnull()) & (df.holidays==0) & ((df.wd_mon==1) | (df.wd_tue==1) | (df.wd_wed==1) | (df.wd_thu==1) | (df.wd_fri==1)) |
| 168 | + df_min_weekdays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index() |
| 169 | + new_var = var + '_min_weekdays' |
| 170 | + df_min_weekdays.rename(columns={var: new_var}, inplace=True) |
| 171 | + df_min_weekdays.loc[df_min_weekdays[new_var]<0, new_var] = 0 |
| 172 | + |
| 173 | + mask = (~df[var].isnull()) & ((df.wd_sat==1) | (df.wd_sun==1)) & (df.holidays==0) |
| 174 | + df_min_weekend = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index() |
| 175 | + new_var = var + '_min_weekend' |
| 176 | + df_min_weekend.rename(columns={var: new_var}, inplace=True) |
| 177 | + df_min_weekend.loc[df_min_weekend[new_var]<0, new_var] = 0 |
| 178 | + |
| 179 | + mask = (~df[var].isnull()) & (df.holidays==1) |
| 180 | + df_min_holidays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index() |
| 181 | + new_var = var + '_min_holidays' |
| 182 | + df_min_holidays.rename(columns={var: new_var}, inplace=True) |
| 183 | + df_min_holidays.loc[df_min_holidays[new_var]<0, new_var] = 0 |
| 184 | + |
| 185 | + # Merging |
| 186 | + dfs = [df_min_holidays, df_min_weekdays, df_min_weekend] |
| 187 | + df_min = reduce(lambda left,right: pd.merge(left,right,how='outer', on=[id, 'month', freq_var], validate='1:1'), dfs) |
| 188 | + df = pd.merge(df, df_min, how='left', on=[id, 'month', freq_var], validate='m:1') |
| 189 | + |
| 190 | + return df |
| 191 | + |
| 192 | + |
0 commit comments