-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathutils.py
64 lines (55 loc) · 2.75 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import random
import numpy as np
from sklearn.metrics import roc_auc_score,accuracy_score,f1_score,auc,precision_recall_curve
def shingle(series, dim):
"""takes a one dimensional series and shingles it into dim dimensions"""
height = len(series) - dim + 1
shingled = np.zeros((dim, height))
for i in range(dim):
shingled[i] = series[i:i + height]
return shingled
def get_result(args, y_test, recon_err_test, n_lin=200, decimal=4, verbose=False):
""" Compute the metrics for anomaly detection results """
threshold = np.linspace(min(recon_err_test),max(recon_err_test), n_lin)
acc_list = []
f1_list = []
auc_list = []
for t in threshold:
y_pred = (recon_err_test>t).astype(np.int)
acc_list.append(accuracy_score(y_pred,y_test))
f1_list.append(f1_score(y_pred,y_test))
auc_list.append(roc_auc_score(y_test,y_pred ))
i = np.argmax(f1_list)
t = threshold[i]
score = f1_list[i]
print('Recommended threshold: %.3f, related f1 score: %.3f'%(t,score))
# t = sorted(recon_err_test, reverse=True)[int((sum(y_test)/len(y_test))*len(y_test))] #按已知异常率设定阈值
# print('Recommended threshold: %.3f'%(t))
y_pred = (recon_err_test>t).astype(np.int)
print('\nTest set : AUC_ROC: {:.3f} F1:{:.3f} Accuracy:{:.3f}'.format(roc_auc_score(y_test,recon_err_test ),f1_score(y_pred,y_test) ,accuracy_score(y_pred,y_test)))
FN = ((y_test==1) & (y_pred==0)).sum()
FP = ((y_test==0) & (y_pred==1)).sum()
TP = ((y_test==1) & (y_pred==1)).sum()
print('precision: {:.4f}'.format(TP/(TP+FP)))
print('Recall: {:.4f}'.format(TP/(FN+TP)))
# print('precision: {:.4f}'.format(precision))
# print('Recall: {:.4f}'.format(recall))
precision, recall, _thresholds = precision_recall_curve(y_test, recon_err_test)
area = auc(recall, precision)
print('AUC_PR: {:.4f}'.format(area))
print('TP: {:.4f}'.format(TP),'FP: {:.4f}'.format(FP),'FN: {:.4f}'.format(FN))
metrics_dict = {}
metrics_dict['dataset'] = args.dataset
metrics_dict['accuracy'] = round(accuracy_score(y_pred,y_test), decimal)
metrics_dict['precision'] = round(TP/(TP+FP), decimal)
metrics_dict['recall'] = round(TP/(FN+TP), decimal)
metrics_dict['f_score'] = round(f1_score(y_pred,y_test), decimal)
metrics_dict['auc_roc'] = round(roc_auc_score(y_test,recon_err_test), decimal)
metrics_dict['auc_pr'] = round(auc(recall, precision), decimal)
metrics_dict['minscore'] = round(min(recon_err_test), decimal)
metrics_dict['maxscore'] = round(max(recon_err_test), decimal)
metrics_dict['average'] = round(recon_err_test.mean(), decimal)
if verbose:
print(pd.DataFrame(metrics_dict, index=['Results']))
return metrics_dict, y_pred