Skip to content

Commit 8b7e05d

Browse files
committed
forecast in the future
1 parent 1c0b6d5 commit 8b7e05d

21 files changed

+2447
-1286
lines changed

Diff for: Code/Plotting/__pycache__/plots.cpython-37.pyc

321 Bytes
Binary file not shown.

Diff for: Code/Plotting/plots.py

+38-9
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
from matplotlib import pyplot as plt
1515
import matplotlib.dates as mdates
1616
import plotly.graph_objects as go
17-
from plotly.subplots import make_subplots
18-
from adjustText import adjust_text
1917

2018
# custom functions
2119
from Configuration.config import cfg_path
@@ -140,17 +138,14 @@ def sliding_fcst_plot(df, predict_col, expected_values, chart_title="", kpi=True
140138

141139
# Add annotations
142140
for col in ['train_start_date', 'train_end_date', 'test_start_date', 'test_end_date']:
143-
if col in list(df.columns):
141+
if col in list(df.columns) and col in ['train_end_date', 'test_end_date']:
144142
col_date = pd.to_datetime(str(df[col].unique()[0])).strftime('%Y-%m-%d')
145-
date_value = df[col].unique()[0]
146-
unique_index = pd.Index(list(df[date].unique()))
147-
closest_date = df.loc[unique_index.get_loc(date_value,method='nearest'), date]
143+
closest_date = df[col].unique()[0]
148144
x_value = pd.to_datetime(df.loc[df[date]==closest_date, date].reset_index(drop=True)[0], format='%Y-%m-%d')
149145
y_value = pd.to_numeric(df.loc[df[date]==closest_date, y].reset_index(drop=True)[0])
150146
fig.add_annotation(
151147
x=x_value,
152-
y=y_value,
153-
#textangle=45,
148+
y=y_value,
154149
text= col + ': ' + str(col_date),
155150
showarrow=True,
156151
arrowhead=1,
@@ -159,7 +154,41 @@ def sliding_fcst_plot(df, predict_col, expected_values, chart_title="", kpi=True
159154
font = dict(
160155
color="black",
161156
size=16
162-
))
157+
))
158+
elif col in list(df.columns) and col in ['train_start_date']:
159+
col_date = pd.to_datetime(str(df[col].unique()[0])).strftime('%Y-%m-%d')
160+
closest_date = df[col].unique()[0]
161+
x_value = pd.to_datetime(df.loc[df[date]==closest_date, date].reset_index(drop=True)[0], format='%Y-%m-%d')
162+
y_value = pd.to_numeric(df.loc[df[date]==closest_date, y].reset_index(drop=True)[0])
163+
fig.add_annotation(
164+
x=x_value,
165+
y=y_value*2,
166+
text= col + ': ' + str(col_date),
167+
showarrow=True,
168+
arrowhead=1,
169+
arrowsize=1,
170+
arrowwidth=2,
171+
font = dict(
172+
color="black",
173+
size=16
174+
))
175+
elif col in list(df.columns) and col in ['test_start_date']:
176+
col_date = pd.to_datetime(str(df[col].unique()[0])).strftime('%Y-%m-%d')
177+
closest_date = df[col].unique()[0]
178+
x_value = pd.to_datetime(df.loc[df[date]==closest_date, date].reset_index(drop=True)[0], format='%Y-%m-%d')
179+
y_value = pd.to_numeric(df.loc[df[date]==closest_date, y].reset_index(drop=True)[0])
180+
fig.add_annotation(
181+
x=x_value,
182+
y=y_value*1.5,
183+
text= col + ': ' + str(col_date),
184+
showarrow=True,
185+
arrowhead=1,
186+
arrowsize=1,
187+
arrowwidth=2,
188+
font = dict(
189+
color="black",
190+
size=16
191+
))
163192
else:
164193
print('No annotation available for', col)
165194

0 Bytes
Binary file not shown.

Diff for: Code/Regressors/regressors.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ def add_months(df, date_var):
9292
return df
9393

9494
def calculate_degree_days(df, base_temperature, temperature):
95-
"""Calculate the Degree Days Heating and Cooling values
95+
"""
96+
Calculate the Degree Days Heating and Cooling values
9697
:params: dataframe, base temperature to start and actual temperature as string
9798
:return: a pandas dataframe
9899
"""
@@ -102,7 +103,8 @@ def calculate_degree_days(df, base_temperature, temperature):
102103
return df
103104

104105
def merge_holidays_by_date(df, df_holidays, id):
105-
"""Merge Holiday df with the train df
106+
"""
107+
Merge Holiday df with the train df
106108
:params: df as dataframe, df_holidays as df containing info on holidays, id as string
107109
:return: a pandas dataframe
108110
"""
@@ -124,7 +126,8 @@ def merge_holidays_by_date(df, df_holidays, id):
124126
return df
125127

126128
def merge_additional_days_off(df, df_metadata, id, dict_days_off):
127-
"""Merge Site Weekend data with train df
129+
"""
130+
Merge Site Weekend data with train df
128131
:params: df as dataframe, df_metadata as df containing additional info, id as string, dict_days_off as dictionary
129132
:return: a pandas dataframe
130133
"""
@@ -142,7 +145,8 @@ def merge_additional_days_off(df, df_metadata, id, dict_days_off):
142145
return df
143146

144147
def merge_weather(df, weather, date_var, id):
145-
"""Merge weather data into the train df
148+
"""
149+
Merge weather data into the train df
146150
:params: df as dataframe, weather as dataframe with weather info, date_var as string, id as string
147151
:return: a pandas dataframe
148152

Diff for: Code/Regressors/similar_day.py

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# data elaboration functions
2+
import numpy as np
3+
import pandas as pd
4+
import holidays as h
5+
from functools import reduce
6+
7+
# datetime functions
8+
import dateutil
9+
import datetime
10+
from dateutil.relativedelta import relativedelta
11+
12+
# custom functions
13+
from Code.Regressors.regressors import Regressors
14+
from Code.Utils.utils import AlphabeticalCombinations, Utils
15+
16+
class SimilarDay:
17+
def get_similar_days_in_previous_year(dates, country):
18+
"""
19+
Retrieves the similar day for a given date.
20+
:param dates: a list-like object of dates, country as string
21+
:return: a Pandas series of similar days
22+
"""
23+
d = pd.to_datetime(pd.Series(dates))
24+
holidays = eval("h." + country.capitalize() + "()")
25+
return d.apply(lambda x: SimilarDay.get_similar_day_in_previous_year(x, holidays))
26+
27+
def get_similar_days_in_previous_week(dates, country):
28+
"""
29+
Retrieves the similar day for a given date.
30+
:param dates: a list-like object of dates, country as string
31+
:return: a Pandas series of similar days
32+
"""
33+
d = pd.to_datetime(pd.Series(dates))
34+
holidays = eval("h." + country.capitalize() + "()")
35+
return d.apply(lambda x: SimilarDay.get_similar_day_in_previous_week(x, holidays))
36+
37+
38+
def get_similar_day_in_previous_year(d, holiday_calendar):
39+
"""
40+
Retrieves the similar day for a given date. If the given date is not an holiday, the similar day is the
41+
closest day of the previous year in terms of calendar position which shares the weekday. If such a date is an holiday,
42+
the same weekday of the week before is considered.
43+
If the given date is an holiday, its similar day is the closest holiday to the given date in the previous year.
44+
:param d: a date
45+
:param holiday_calendar: a calendar from holidays package
46+
:return: the similar day
47+
"""
48+
if not d or pd.isna(d):
49+
return None
50+
51+
new_date = d - relativedelta(years=1)
52+
holiday = holiday_calendar.get(d)
53+
diff = d.weekday() - new_date.weekday() if d.weekday() >= new_date.weekday() \
54+
else d.weekday() - new_date.weekday() + 7
55+
56+
if not holiday:
57+
new_date = new_date + datetime.timedelta(days=diff)
58+
while holiday_calendar.get(new_date):
59+
new_date = new_date - datetime.timedelta(days=7)
60+
# elif holiday == 'Pasqua di Resurrezione':
61+
# new_date = dateutil.easter.easter(new_date.year)
62+
# elif holiday == "Lunedì dell'Angelo":
63+
# new_date = dateutil.easter.easter(new_date.year) + datetime.timedelta(days=1)
64+
65+
return new_date
66+
67+
def get_similar_day_in_previous_week(d, holiday_calendar):
68+
"""
69+
Retrieves the similar day for a given date. If the given date is not an holiday, the similar day is the
70+
closest day of the previous year in terms of calendar position which shares the weekday. If such a date is an holiday,
71+
the same weekday of the week before is considered.
72+
If the given date is an holiday, its similar day is the closest holiday to the given date in the previous year.
73+
:param d: a date
74+
:param holiday_calendar: a calendar from holidays package
75+
:return: the similar day
76+
"""
77+
if not d or pd.isna(d):
78+
return None
79+
80+
new_date = d - relativedelta(weeks=1)
81+
holiday = holiday_calendar.get(d)
82+
diff = d.weekday() - new_date.weekday() if d.weekday() >= new_date.weekday() \
83+
else d.weekday() - new_date.weekday() + 7
84+
85+
if not holiday:
86+
new_date = new_date + datetime.timedelta(days=diff)
87+
while holiday_calendar.get(new_date):
88+
new_date = new_date - datetime.timedelta(days=7)
89+
# elif holiday == 'Pasqua di Resurrezione':
90+
# new_date = dateutil.easter.easter(new_date.year)
91+
# elif holiday == "Lunedì dell'Angelo":
92+
# new_date = dateutil.easter.easter(new_date.year) + datetime.timedelta(days=1)
93+
94+
return new_date
95+
96+
class StandardConsumption:
97+
def get_standard_consumption_as_mean(df, id, date_var, var, country):
98+
"""
99+
Retrieves the standard consumption for a given date as hourly monthly mean differentiated by holiday, weekend, weekdays.
100+
:params: dataframe and date_var as string, var as string, country as string
101+
:return: the similar day
102+
"""
103+
104+
df = Regressors.add_holidays_by_country(df, date_var, country)
105+
df = Regressors.add_weekdays(df, date_var)
106+
df.loc[:, 'day'] = df.loc[:, date_var].dt.day
107+
df.loc[:, 'hour'] = df.loc[:, date_var].dt.hour
108+
df.loc[:, 'month'] = df.loc[:, date_var].dt.month
109+
110+
timedelta = Utils.delta_format(abs(np.diff(df[date_var])).mean())
111+
freq = Utils.find_freq(timedelta)
112+
113+
if freq == 'D':
114+
freq_var='day'
115+
else:
116+
freq_var='hour'
117+
118+
# Compute standard consumption as means
119+
mask = (~df[var].isnull()) & ((df.wd_mon==1) | (df.wd_tue==1) | (df.wd_wed==1) | (df.wd_thu==1) | (df.wd_fri==1)) & (df.holidays==0)
120+
df_mean_weekdays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index()
121+
new_var = var + '_std_weekdays'
122+
df_mean_weekdays.rename(columns={var: new_var}, inplace=True)
123+
df_mean_weekdays.loc[df_mean_weekdays[new_var]<0, new_var] = 0
124+
125+
mask = (~df[var].isnull()) & ((df.wd_sat==1) | (df.wd_sun==1)) & (df.holidays==0)
126+
df_mean_weekend = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index()
127+
new_var = var + '_std_weekend'
128+
df_mean_weekend.rename(columns={var: new_var}, inplace=True)
129+
df_mean_weekend.loc[df_mean_weekend[new_var]<0, new_var] = 0
130+
131+
mask = (~df[var].isnull()) & (df.holidays==1)
132+
df_mean_holidays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index()
133+
new_var = var + '_std_holidays'
134+
df_mean_holidays.rename(columns={var: new_var}, inplace=True)
135+
df_mean_holidays.loc[df_mean_holidays[new_var]<0, new_var] = 0
136+
137+
# Merging
138+
dfs = [df_mean_holidays, df_mean_weekdays, df_mean_weekend]
139+
df_mean = reduce(lambda left,right: pd.merge(left,right,how='outer', on=[id, 'month', freq_var], validate='1:1'), dfs)
140+
df = pd.merge(df, df_mean, how='left', on=[id, 'month', freq_var], validate='m:1')
141+
142+
return df
143+
144+
145+
def get_minimum_consumption(df, date_var, var, country):
146+
"""
147+
Retrieves the minimum consumption for a given date as hourly monthly minimum value differentiated by holiday, weekend, night.
148+
:params: dataframe and date_var as string, var as string, country as string
149+
:return: the similar day
150+
"""
151+
152+
df = Regressors.add_holidays_by_country(df, date_var, country)
153+
df = Regressors.add_weekdays(df, date_var)
154+
df.loc[:, 'day'] = df.loc[:, date_var].dt.day
155+
df.loc[:, 'hour'] = df.loc[:, date_var].dt.hour
156+
df.loc[:, 'month'] = df.loc[:, date_var].dt.month
157+
158+
timedelta = Utils.delta_format(abs(np.diff(df[date_var])).mean())
159+
freq = Utils.find_freq(timedelta)
160+
161+
if freq == 'D':
162+
freq_var='day'
163+
else:
164+
freq_var='hour'
165+
166+
# Compute min consumption
167+
mask = (~df[var].isnull()) & (df.holidays==0) & ((df.wd_mon==1) | (df.wd_tue==1) | (df.wd_wed==1) | (df.wd_thu==1) | (df.wd_fri==1))
168+
df_min_weekdays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index()
169+
new_var = var + '_min_weekdays'
170+
df_min_weekdays.rename(columns={var: new_var}, inplace=True)
171+
df_min_weekdays.loc[df_min_weekdays[new_var]<0, new_var] = 0
172+
173+
mask = (~df[var].isnull()) & ((df.wd_sat==1) | (df.wd_sun==1)) & (df.holidays==0)
174+
df_min_weekend = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index()
175+
new_var = var + '_min_weekend'
176+
df_min_weekend.rename(columns={var: new_var}, inplace=True)
177+
df_min_weekend.loc[df_min_weekend[new_var]<0, new_var] = 0
178+
179+
mask = (~df[var].isnull()) & (df.holidays==1)
180+
df_min_holidays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index()
181+
new_var = var + '_min_holidays'
182+
df_min_holidays.rename(columns={var: new_var}, inplace=True)
183+
df_min_holidays.loc[df_min_holidays[new_var]<0, new_var] = 0
184+
185+
# Merging
186+
dfs = [df_min_holidays, df_min_weekdays, df_min_weekend]
187+
df_min = reduce(lambda left,right: pd.merge(left,right,how='outer', on=[id, 'month', freq_var], validate='1:1'), dfs)
188+
df = pd.merge(df, df_min, how='left', on=[id, 'month', freq_var], validate='m:1')
189+
190+
return df
191+
192+

Diff for: Code/Regressors/temperatures.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# selenium for web driving
2+
from logging import raiseExceptions
3+
from selenium import webdriver
4+
from selenium.webdriver.common.by import By
5+
from selenium.webdriver.support.ui import WebDriverWait
6+
from selenium.webdriver.support import expected_conditions as EC
7+
from selenium.webdriver import ActionChains
8+
from selenium.webdriver.common.keys import Keys
9+
from selenium.webdriver.chrome.options import Options
10+
11+
# time for pausing between navigation
12+
import time
13+
import glob
14+
import shutil
15+
16+
# datetime functions
17+
import datetime as dt
18+
19+
# file management functions
20+
import os
21+
import configparser
22+
import ctypes
23+
24+
# data elaboration functions
25+
import pandas as pd
26+
import numpy as np
27+
from openpyxl import load_workbook
28+
from functools import reduce
29+
30+
# custom functions
31+
from Code.Utils.utils import Utils, AlphabeticalCombinations
32+
33+
class Temperatures:
34+
35+
def ten_year(df, id, date_var, freq, temperature_list, start_date, end_date):
36+
"""
37+
Computes ten year temperatures and asis temperatures
38+
:params: dataframe
39+
:return: a Pandas dataframe, a .pkl file and a .xlsx file
40+
"""
41+
ten_year_list = []
42+
ten_year_overall_list = []
43+
for t in temperature_list:
44+
ten_year_list = ten_year_list + [t + '_ten_year']
45+
ten_year_overall_list = ten_year_overall_list + [t + '_ten_year_overall']
46+
47+
df_seq = Utils.add_seq(df, date_var = date_var, serie=id, freq = freq, start_date=start_date, end_date=end_date)
48+
df_seq.loc[:, 'months_days'] = df_seq.loc[:, date_var].dt.strftime('%m/%d')
49+
50+
# Defining averages by id
51+
df_to_merge = pd.pivot_table(df_seq, values=temperature_list, index=[id, 'months_days'], aggfunc=np.mean).reset_index()
52+
col_list = [id, 'months_days'] + ten_year_list
53+
df_to_merge.columns = col_list
54+
55+
# Defining overall averages
56+
df_to_merge_overall = pd.pivot_table(df_seq, values=temperature_list, index=['months_days'], aggfunc=np.mean).reset_index()
57+
col_list_overall = ['months_days'] + ten_year_overall_list
58+
df_to_merge_overall.columns = col_list_overall
59+
60+
# Merging
61+
df_merge = pd.merge(df_seq, df_to_merge, on=[id, 'months_days'], how='left', validate='m:1')
62+
df_merge_overall = pd.merge(df_merge, df_to_merge_overall, on=['months_days'], how='left', validate='m:1')
63+
64+
### Creating As-Is temperatures: where available use actual temp, if not use ten year
65+
for t in temperature_list:
66+
asis_name = t + '_asis'
67+
ten_year_name = t + '_ten_year'
68+
ten_year_overall_name = t + '_ten_year_overall'
69+
df_merge_overall.loc[:, asis_name] = df_merge_overall.loc[:, t]
70+
df_merge_overall.loc[df_merge_overall[asis_name].isnull(), asis_name] = df_merge_overall.loc[:, ten_year_name]
71+
df_merge_overall.loc[df_merge_overall[asis_name].isnull(), asis_name] = df_merge_overall.loc[:, ten_year_overall_name]
72+
73+
if (any(df_merge_overall[asis_name].isnull())):
74+
print('ten_year: asis temperatures still CONTAIN nan value: removing')
75+
df_merge_overall = df_merge_overall.loc[df_merge_overall[asis_name].isnull()==False, ]
76+
else:
77+
print('ten_year: asis temperatures do NOT contain any nan value')
78+
79+
df_ten_year = df_merge_overall.loc[:, ['site_id', 'timestamp', 'temperature', 'distance', 'months_days',
80+
'temperature_ten_year', 'temperature_asis']]
81+
82+
return df_ten_year
83+
84+

Diff for: Code/Scoring/__pycache__/forecast.cpython-37.pyc

-583 Bytes
Binary file not shown.

Diff for: Code/Scoring/__pycache__/kpi.cpython-37.pyc

2.96 KB
Binary file not shown.

Diff for: Code/Scoring/__pycache__/scoring.cpython-37.pyc

37 Bytes
Binary file not shown.

Diff for: Code/Scoring/__pycache__/train.cpython-37.pyc

-559 Bytes
Binary file not shown.

Diff for: Code/Scoring/__pycache__/train_test.cpython-37.pyc

534 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)