microsoft
diff --git a/Diff for: ‎Code/Plotting/__pycache__/plots.cpython-37.pyc
321 Bytes b/Diff for: ‎Code/Plotting/__pycache__/plots.cpython-37.pyc
321 Bytes
diff --git a/Diff for: ‎Code/Plotting/plots.py
+38-9 b/Diff for: ‎Code/Plotting/plots.py
+38-9
diff --git a/Diff for: ‎Code/Regressors/__pycache__/regressors.cpython-37.pyc
0 Bytes b/Diff for: ‎Code/Regressors/__pycache__/regressors.cpython-37.pyc
0 Bytes
diff --git a/Diff for: ‎Code/Regressors/regressors.py
+8-4 b/Diff for: ‎Code/Regressors/regressors.py
+8-4
diff --git a/Diff for: ‎Code/Regressors/similar_day.py
+192 b/Diff for: ‎Code/Regressors/similar_day.py
+192
diff --git a/Diff for: ‎Code/Regressors/temperatures.py
+84 b/Diff for: ‎Code/Regressors/temperatures.py
+84
diff --git a/Diff for: ‎Code/Scoring/__pycache__/forecast.cpython-37.pyc
-583 Bytes b/Diff for: ‎Code/Scoring/__pycache__/forecast.cpython-37.pyc
-583 Bytes
diff --git a/Diff for: ‎Code/Scoring/__pycache__/kpi.cpython-37.pyc
2.96 KB b/Diff for: ‎Code/Scoring/__pycache__/kpi.cpython-37.pyc
2.96 KB
diff --git a/Diff for: ‎Code/Scoring/__pycache__/scoring.cpython-37.pyc
37 Bytes b/Diff for: ‎Code/Scoring/__pycache__/scoring.cpython-37.pyc
37 Bytes
diff --git a/Diff for: ‎Code/Scoring/__pycache__/train.cpython-37.pyc
-559 Bytes b/Diff for: ‎Code/Scoring/__pycache__/train.cpython-37.pyc
-559 Bytes
diff --git a/Diff for: ‎Code/Scoring/__pycache__/train_test.cpython-37.pyc
534 Bytes b/Diff for: ‎Code/Scoring/__pycache__/train_test.cpython-37.pyc
534 Bytes
@@ -14,8 +14,6 @@
 from matplotlib import pyplot as plt
 import matplotlib.dates as mdates
 import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-from adjustText import adjust_text
 
 # custom functions
 from Configuration.config import cfg_path
@@ -140,17 +138,14 @@ def sliding_fcst_plot(df, predict_col, expected_values, chart_title="", kpi=True
 
         # Add annotations
         for col in ['train_start_date', 'train_end_date', 'test_start_date', 'test_end_date']: 
-            if col in list(df.columns):
+            if col in list(df.columns) and col in ['train_end_date', 'test_end_date']:
                 col_date = pd.to_datetime(str(df[col].unique()[0])).strftime('%Y-%m-%d')
-                date_value = df[col].unique()[0]
-                unique_index = pd.Index(list(df[date].unique()))
-                closest_date = df.loc[unique_index.get_loc(date_value,method='nearest'), date]
+                closest_date = df[col].unique()[0]
                 x_value = pd.to_datetime(df.loc[df[date]==closest_date, date].reset_index(drop=True)[0], format='%Y-%m-%d') 
                 y_value = pd.to_numeric(df.loc[df[date]==closest_date, y].reset_index(drop=True)[0])
                 fig.add_annotation(
                 x=x_value, 
-                y=y_value,
-                #textangle=45,
+                y=y_value,  
                 text= col + ': ' +  str(col_date),
                 showarrow=True,
                 arrowhead=1, 
@@ -159,7 +154,41 @@ def sliding_fcst_plot(df, predict_col, expected_values, chart_title="", kpi=True
                 font = dict(
                 color="black",
                 size=16
-            ))
+                ))            
+            elif col in list(df.columns) and col in ['train_start_date']:
+                col_date = pd.to_datetime(str(df[col].unique()[0])).strftime('%Y-%m-%d')
+                closest_date  = df[col].unique()[0]
+                x_value = pd.to_datetime(df.loc[df[date]==closest_date, date].reset_index(drop=True)[0], format='%Y-%m-%d') 
+                y_value = pd.to_numeric(df.loc[df[date]==closest_date, y].reset_index(drop=True)[0])
+                fig.add_annotation(
+                x=x_value, 
+                y=y_value*2,  
+                text= col + ': ' +  str(col_date),
+                showarrow=True,
+                arrowhead=1, 
+                arrowsize=1,
+                arrowwidth=2,
+                font = dict(
+                color="black",
+                size=16
+                ))
+            elif col in list(df.columns) and col in ['test_start_date']:
+                col_date = pd.to_datetime(str(df[col].unique()[0])).strftime('%Y-%m-%d')
+                closest_date = df[col].unique()[0]
+                x_value = pd.to_datetime(df.loc[df[date]==closest_date, date].reset_index(drop=True)[0], format='%Y-%m-%d') 
+                y_value = pd.to_numeric(df.loc[df[date]==closest_date, y].reset_index(drop=True)[0])
+                fig.add_annotation(
+                x=x_value, 
+                y=y_value*1.5,  
+                text= col + ': ' +  str(col_date),
+                showarrow=True,
+                arrowhead=1, 
+                arrowsize=1,
+                arrowwidth=2,
+                font = dict(
+                color="black",
+                size=16
+                ))
             else:
                 print('No annotation available for', col)
 
 
@@ -92,7 +92,8 @@ def add_months(df, date_var):
         return df 
 
     def calculate_degree_days(df, base_temperature, temperature):
-        """Calculate the Degree Days Heating and Cooling values
+        """
+        Calculate the Degree Days Heating and Cooling values
         :params: dataframe, base temperature to start and actual temperature as string
         :return: a pandas dataframe
         """
@@ -102,7 +103,8 @@ def calculate_degree_days(df, base_temperature, temperature):
         return df
 
     def merge_holidays_by_date(df, df_holidays, id):
-        """Merge Holiday df with the train df
+        """
+        Merge Holiday df with the train df
         :params: df as dataframe, df_holidays as df containing info on holidays, id as string
         :return: a pandas dataframe
         """
@@ -124,7 +126,8 @@ def merge_holidays_by_date(df, df_holidays, id):
         return df
 
     def merge_additional_days_off(df, df_metadata, id, dict_days_off):
-        """Merge Site Weekend data with train df
+        """
+        Merge Site Weekend data with train df
         :params: df as dataframe, df_metadata as df containing additional info, id as string, dict_days_off as dictionary 
         :return: a pandas dataframe
         """
@@ -142,7 +145,8 @@ def merge_additional_days_off(df, df_metadata, id, dict_days_off):
         return df
 
     def merge_weather(df, weather, date_var, id):
-        """Merge weather data into the train df
+        """
+        Merge weather data into the train df
         :params: df as dataframe, weather as dataframe with weather info, date_var as string, id as string
         :return: a pandas dataframe
         
 
@@ -0,0 +1,192 @@
+# data elaboration functions
+import numpy as np
+import pandas as pd
+import holidays as h
+from functools import reduce
+
+# datetime functions
+import dateutil
+import datetime
+from dateutil.relativedelta import relativedelta
+
+# custom functions
+from Code.Regressors.regressors import Regressors
+from Code.Utils.utils import AlphabeticalCombinations, Utils
+
+class SimilarDay: 
+    def get_similar_days_in_previous_year(dates, country):
+        """
+        Retrieves the similar day for a given date. 
+        :param dates: a list-like object of dates, country as string
+        :return: a Pandas series of similar days
+        """
+        d = pd.to_datetime(pd.Series(dates))        
+        holidays = eval("h." + country.capitalize() + "()")
+        return d.apply(lambda x: SimilarDay.get_similar_day_in_previous_year(x, holidays))
+
+    def get_similar_days_in_previous_week(dates, country):
+        """
+        Retrieves the similar day for a given date.
+        :param dates: a list-like object of dates, country as string
+        :return: a Pandas series of similar days
+        """
+        d = pd.to_datetime(pd.Series(dates))
+        holidays = eval("h." + country.capitalize() + "()")
+        return d.apply(lambda x: SimilarDay.get_similar_day_in_previous_week(x, holidays))
+
+
+    def get_similar_day_in_previous_year(d, holiday_calendar):
+        """
+        Retrieves the similar day for a given date. If the given date is not an holiday, the similar day is the
+        closest day of the previous year in terms of calendar position which shares the weekday. If such a date is an holiday,
+        the same weekday of the week before is considered. 
+        If the given date is an holiday, its similar day is the closest holiday to the given date in the previous year.
+        :param d: a date
+        :param holiday_calendar: a calendar from holidays package
+        :return: the similar day
+        """
+        if not d or pd.isna(d):
+            return None
+
+        new_date = d - relativedelta(years=1)
+        holiday = holiday_calendar.get(d)
+        diff = d.weekday() - new_date.weekday() if d.weekday() >= new_date.weekday() \
+            else d.weekday() - new_date.weekday() + 7
+
+        if not holiday:
+            new_date = new_date + datetime.timedelta(days=diff)
+            while holiday_calendar.get(new_date):
+                new_date = new_date - datetime.timedelta(days=7)
+        # elif holiday == 'Pasqua di Resurrezione':
+        #     new_date = dateutil.easter.easter(new_date.year)
+        # elif holiday == "Lunedì dell'Angelo":
+        #     new_date = dateutil.easter.easter(new_date.year) + datetime.timedelta(days=1)
+
+        return new_date
+
+    def get_similar_day_in_previous_week(d, holiday_calendar):
+        """
+        Retrieves the similar day for a given date. If the given date is not an holiday, the similar day is the
+        closest day of the previous year in terms of calendar position which shares the weekday. If such a date is an holiday,
+        the same weekday of the week before is considered. 
+        If the given date is an holiday, its similar day is the closest holiday to the given date in the previous year.
+        :param d: a date
+        :param holiday_calendar: a calendar from holidays package
+        :return: the similar day
+        """
+        if not d or pd.isna(d):
+            return None
+
+        new_date = d - relativedelta(weeks=1)
+        holiday = holiday_calendar.get(d)
+        diff = d.weekday() - new_date.weekday() if d.weekday() >= new_date.weekday() \
+            else d.weekday() - new_date.weekday() + 7
+
+        if not holiday:
+            new_date = new_date + datetime.timedelta(days=diff)
+            while holiday_calendar.get(new_date):
+                new_date = new_date - datetime.timedelta(days=7)
+        # elif holiday == 'Pasqua di Resurrezione':
+        #     new_date = dateutil.easter.easter(new_date.year)
+        # elif holiday == "Lunedì dell'Angelo":
+        #     new_date = dateutil.easter.easter(new_date.year) + datetime.timedelta(days=1)
+
+        return new_date
+
+class StandardConsumption:   
+    def get_standard_consumption_as_mean(df, id, date_var, var, country):
+        """
+        Retrieves the standard consumption for a given date as hourly monthly mean differentiated by holiday, weekend, weekdays. 
+        :params: dataframe and date_var as string, var as string, country as string
+        :return: the similar day
+        """
+
+        df = Regressors.add_holidays_by_country(df, date_var, country)
+        df = Regressors.add_weekdays(df, date_var)
+        df.loc[:, 'day'] = df.loc[:, date_var].dt.day
+        df.loc[:, 'hour'] = df.loc[:, date_var].dt.hour
+        df.loc[:, 'month'] = df.loc[:, date_var].dt.month
+        
+        timedelta = Utils.delta_format(abs(np.diff(df[date_var])).mean())
+        freq = Utils.find_freq(timedelta)
+        
+        if freq == 'D':
+            freq_var='day'
+        else:
+            freq_var='hour'
+        
+        # Compute standard consumption as means        
+        mask = (~df[var].isnull()) &  ((df.wd_mon==1) | (df.wd_tue==1) | (df.wd_wed==1) | (df.wd_thu==1) | (df.wd_fri==1)) & (df.holidays==0) 
+        df_mean_weekdays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index()
+        new_var = var + '_std_weekdays'
+        df_mean_weekdays.rename(columns={var: new_var}, inplace=True)
+        df_mean_weekdays.loc[df_mean_weekdays[new_var]<0, new_var] = 0
+        
+        mask = (~df[var].isnull()) & ((df.wd_sat==1) | (df.wd_sun==1)) & (df.holidays==0) 
+        df_mean_weekend = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index()
+        new_var = var + '_std_weekend'
+        df_mean_weekend.rename(columns={var: new_var}, inplace=True)
+        df_mean_weekend.loc[df_mean_weekend[new_var]<0, new_var] = 0
+        
+        mask = (~df[var].isnull()) & (df.holidays==1) 
+        df_mean_holidays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.mean).reset_index()
+        new_var = var + '_std_holidays'
+        df_mean_holidays.rename(columns={var: new_var}, inplace=True)
+        df_mean_holidays.loc[df_mean_holidays[new_var]<0, new_var] = 0
+        
+        # Merging
+        dfs = [df_mean_holidays, df_mean_weekdays, df_mean_weekend]
+        df_mean = reduce(lambda left,right: pd.merge(left,right,how='outer', on=[id, 'month', freq_var], validate='1:1'), dfs)
+        df = pd.merge(df, df_mean, how='left', on=[id, 'month', freq_var], validate='m:1')
+        
+        return df
+    
+    
+    def get_minimum_consumption(df, date_var, var, country):
+        """
+        Retrieves the minimum consumption for a given date as hourly monthly minimum value differentiated by holiday, weekend, night. 
+        :params: dataframe and date_var as string, var as string, country as string
+        :return: the similar day
+        """
+
+        df = Regressors.add_holidays_by_country(df, date_var, country)
+        df = Regressors.add_weekdays(df, date_var)
+        df.loc[:, 'day'] = df.loc[:, date_var].dt.day
+        df.loc[:, 'hour'] = df.loc[:, date_var].dt.hour
+        df.loc[:, 'month'] = df.loc[:, date_var].dt.month
+        
+        timedelta = Utils.delta_format(abs(np.diff(df[date_var])).mean())
+        freq = Utils.find_freq(timedelta)
+        
+        if freq == 'D':
+            freq_var='day'
+        else:
+            freq_var='hour'
+        
+        # Compute min consumption        
+        mask = (~df[var].isnull()) & (df.holidays==0) & ((df.wd_mon==1) | (df.wd_tue==1) | (df.wd_wed==1) | (df.wd_thu==1) | (df.wd_fri==1))
+        df_min_weekdays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index()
+        new_var = var + '_min_weekdays'
+        df_min_weekdays.rename(columns={var: new_var}, inplace=True)
+        df_min_weekdays.loc[df_min_weekdays[new_var]<0, new_var] = 0
+        
+        mask = (~df[var].isnull()) & ((df.wd_sat==1) | (df.wd_sun==1)) & (df.holidays==0) 
+        df_min_weekend = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index()
+        new_var = var + '_min_weekend'
+        df_min_weekend.rename(columns={var: new_var}, inplace=True)
+        df_min_weekend.loc[df_min_weekend[new_var]<0, new_var] = 0
+        
+        mask = (~df[var].isnull()) & (df.holidays==1) 
+        df_min_holidays = pd.pivot_table(df.loc[mask==True, ], index=[id, 'month', freq_var], values=var, aggfunc=np.min).reset_index()
+        new_var = var + '_min_holidays'
+        df_min_holidays.rename(columns={var: new_var}, inplace=True)
+        df_min_holidays.loc[df_min_holidays[new_var]<0, new_var] = 0
+        
+        # Merging
+        dfs = [df_min_holidays, df_min_weekdays, df_min_weekend]
+        df_min = reduce(lambda left,right: pd.merge(left,right,how='outer', on=[id, 'month', freq_var], validate='1:1'), dfs)
+        df = pd.merge(df, df_min, how='left', on=[id, 'month', freq_var], validate='m:1')
+        
+        return df
+
+
@@ -0,0 +1,84 @@
+# selenium for web driving
+from logging import raiseExceptions
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver import ActionChains
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.options import Options
+
+# time for pausing between navigation
+import time
+import glob
+import shutil
+
+# datetime functions
+import datetime as dt
+
+# file management functions
+import os
+import configparser
+import ctypes
+
+# data elaboration functions
+import pandas as pd
+import numpy as np
+from openpyxl import load_workbook
+from functools import reduce
+
+# custom functions
+from Code.Utils.utils import Utils, AlphabeticalCombinations
+
+class Temperatures:
+        
+    def ten_year(df, id, date_var, freq, temperature_list, start_date, end_date):
+        """
+        Computes ten year temperatures and asis temperatures
+        :params: dataframe
+        :return: a Pandas dataframe, a .pkl file and a .xlsx file
+        """
+        ten_year_list = []
+        ten_year_overall_list = []
+        for t in temperature_list:
+            ten_year_list = ten_year_list + [t + '_ten_year']
+            ten_year_overall_list = ten_year_overall_list + [t + '_ten_year_overall']
+            
+        df_seq = Utils.add_seq(df, date_var = date_var, serie=id, freq = freq, start_date=start_date, end_date=end_date)
+        df_seq.loc[:, 'months_days'] = df_seq.loc[:, date_var].dt.strftime('%m/%d')
+        
+        # Defining averages by id
+        df_to_merge = pd.pivot_table(df_seq, values=temperature_list, index=[id, 'months_days'], aggfunc=np.mean).reset_index()
+        col_list = [id, 'months_days'] + ten_year_list
+        df_to_merge.columns = col_list
+        
+        # Defining overall averages
+        df_to_merge_overall = pd.pivot_table(df_seq, values=temperature_list, index=['months_days'], aggfunc=np.mean).reset_index()
+        col_list_overall = ['months_days'] + ten_year_overall_list 
+        df_to_merge_overall.columns = col_list_overall
+        
+        # Merging
+        df_merge = pd.merge(df_seq, df_to_merge, on=[id, 'months_days'], how='left', validate='m:1')
+        df_merge_overall = pd.merge(df_merge, df_to_merge_overall, on=['months_days'], how='left', validate='m:1')
+               
+        ### Creating As-Is temperatures: where available use actual temp, if not use ten year
+        for t in temperature_list:
+            asis_name = t + '_asis'
+            ten_year_name = t + '_ten_year'
+            ten_year_overall_name = t + '_ten_year_overall'
+            df_merge_overall.loc[:, asis_name] = df_merge_overall.loc[:, t]
+            df_merge_overall.loc[df_merge_overall[asis_name].isnull(), asis_name] = df_merge_overall.loc[:, ten_year_name]
+            df_merge_overall.loc[df_merge_overall[asis_name].isnull(), asis_name] = df_merge_overall.loc[:, ten_year_overall_name]
+
+            if (any(df_merge_overall[asis_name].isnull())):
+                print('ten_year: asis temperatures still CONTAIN nan value: removing')
+                df_merge_overall = df_merge_overall.loc[df_merge_overall[asis_name].isnull()==False, ]
+            else:
+                print('ten_year: asis temperatures do NOT contain any nan value')
+                
+        df_ten_year = df_merge_overall.loc[:, ['site_id', 'timestamp', 'temperature', 'distance', 'months_days',
+       'temperature_ten_year', 'temperature_asis']]
+
+        return df_ten_year
+    
+