Skip to content

Commit b9c7f0e

Browse files
committed
read me update and insurance claims
1 parent 1cb2b65 commit b9c7f0e

14 files changed

+2847
-179
lines changed
75 Bytes
Binary file not shown.

Code/Plotting/plots.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ def sliding_line_plot(df, serie_to_plot, id, i, chart_title=""):
3232
### Setup
3333
date = Utils.find_date(df)
3434

35+
## Sort
36+
df.sort_values(date, inplace=True)
37+
3538
## Create figure
3639
fig = go.Figure()
3740
fig.add_trace(go.Scatter(x=list(df.loc[df[id] == i, date]), y=list(df.loc[df[id] == i, serie_to_plot]), name=str(i)))
@@ -96,11 +99,15 @@ def sliding_fcst_plot(df, predict_col, expected_values, chart_title="", kpi=True
9699

97100
### Setup
98101
date = Utils.find_date(df)
102+
99103
if isinstance(date, list):
100104
date = list(set(Utils.find_date(df)) - set(['train_start_date', 'train_end_date', 'test_start_date', 'test_end_date']))[0]
101105

102-
y = predict_col
103-
fcst = expected_values
106+
y = predict_col.copy()
107+
fcst = expected_values.copy()
108+
109+
## Sort
110+
df = df.sort_values(date).copy()
104111

105112
## Adding model info to chart title
106113
if 'best_model' in list(df.columns):
Binary file not shown.

Code/Profiling/Intermittent/intermittent.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def compute_indicator_values(vect, threshold, perc, quant, highest, lowest):
6363

6464
### Removing nan
6565
vect = vect[vect!=np.nan]
66+
vect = vect.astype(np.float)
6667

6768
### Create low demand list names
6869
list_low_demand = ["zero", "perc_threshold"]
@@ -82,7 +83,7 @@ def compute_indicator_values(vect, threshold, perc, quant, highest, lowest):
8283
nzd = vect[vect>low_demand]
8384
k = len(nzd)
8485

85-
if sum(vect[vect>low_demand])>=2:
86+
if (sum(vect[vect>low_demand])>=2) & (k>1):
8687
x = np.append([nzd[0]], [nzd[1:k] - nzd[0:(k-1)]])
8788

8889
cv2 = Intermittent.cv2(nzd, highest, lowest)
@@ -97,7 +98,7 @@ def compute_indicator_values(vect, threshold, perc, quant, highest, lowest):
9798

9899
return res
99100

100-
def enh_compute_indicator_values(vect, threshold, perc, quant, highest, lowest):
101+
def enhanced_compute_indicator_values(vect, threshold, perc, quant, highest, lowest):
101102
''' Computes indicator values (enhanced)
102103
:params: vect as numpy array, threshold as numeric, perc as numeric, quant as numeric, highest and lowest as scalars 0<=x<=1 as winsorization percentages
103104
:return: a dictionary
@@ -113,8 +114,9 @@ def enh_compute_indicator_values(vect, threshold, perc, quant, highest, lowest):
113114
vect = vect[1:len(vect)]
114115
print('Threshold:', threshold)
115116

116-
### Removing nan
117-
vect = vect[vect!=np.nan]
117+
### Removing nan and selecting float
118+
vect = vect[(vect!=np.nan)]
119+
vect = vect.astype(np.float)
118120

119121
### Z function
120122
def Z(quant):
@@ -136,7 +138,7 @@ def Z(quant):
136138
nzd = vect[vect>low_demand]
137139
k = len(nzd)
138140

139-
if sum(vect[vect>low_demand])>=2:
141+
if (sum(vect[vect>low_demand])>=2) & (k>1):
140142
x = np.array([nzd[0]]) + [nzd[1:k] - nzd[0:(k-1)]] + np.array([len(vect)+1-nzd[k-1]])
141143

142144
cv2 = Intermittent.cv2(nzd, highest, lowest)
@@ -196,18 +198,18 @@ def classify_intermittent(df, type, thres_cv2_constant, thres_cv2, thres_adi, th
196198
except:
197199
print('classify_intermittent: no constant ids')
198200

199-
# Intermittent
200-
mask_intermittent = (score_no_nan.type == type) &\
201+
# Spikes
202+
mask_spikes = (score_no_nan.type == type) &\
201203
(score_no_nan.k > min_time_cons) &\
202204
(score_no_nan.cv2 < thres_cv2) &\
203205
(score_no_nan.cv2 >= thres_adi) &\
204206
(score_no_nan.cv2 < thres_sddi)
205-
df_intermittent = score_no_nan.loc[mask_intermittent, ]
207+
df_spikes = score_no_nan.loc[mask_spikes, ]
206208
try:
207-
df_intermittent.loc[:, 'profile'] = 'intermittent'
208-
print('classify_intermittent: intermittent ids', len(df_intermittent))
209+
df_spikes.loc[:, 'profile'] = 'spikes'
210+
print('classify_intermittent: spikes ids', len(df_spikes))
209211
except:
210-
print('classify_intermittent: no intermittent ids')
212+
print('classify_intermittent: no spikes ids')
211213

212214
# Lumpy
213215
mask_lumpy = (score_no_nan.type == type) &\
@@ -260,13 +262,13 @@ def classify_intermittent(df, type, thres_cv2_constant, thres_cv2, thres_adi, th
260262
print('classify_intermittent: no unforecastable_quantity ids')
261263

262264
# df_profiling
263-
df_profiling = pd.concat([df_regular, df_constant_zero, df_constant, df_intermittent, df_lumpy, df_erratic, df_unforecastable_time, df_unforecastable_quantity], axis=0)
265+
df_profiling = pd.concat([df_regular, df_constant_zero, df_constant, df_spikes, df_lumpy, df_erratic, df_unforecastable_time, df_unforecastable_quantity], axis=0)
264266

265267
return df_profiling
266268

267269
def call_intermittent_function(func, *args):
268270
from Code.Profiling.Intermittent.intermittent import Intermittent
269-
func_dict = {'enh_compute_indicator_values': Intermittent.enh_compute_indicator_values, 'compute_indicator_values': Intermittent.compute_indicator_values}
271+
func_dict = {'enhanced_compute_indicator_values': Intermittent.enhanced_compute_indicator_values, 'compute_indicator_values': Intermittent.compute_indicator_values}
270272
result = func_dict.get(func)(*args)
271273
return result
272274

Docs/Images/intermittent_TS.png

-98.8 KB
Loading

Docs/Images/panel_data.png

59.3 KB
Loading

Docs/Images/sliding_plot.png

69.3 KB
Loading
1.65 MB
Binary file not shown.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)