-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy patheval.py
195 lines (157 loc) · 5.56 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""
The functions in this module compute the mean F1-score evaluation metric,
perform cross-validation, grid search and make predictions from inside a
cross-validation loop for model stacking.
project: Kaggle WISE 2014 Greek Media competition
author: David Thaler
"""
import numpy as np
import pandas as pd
from sklearn.cross_validation import cross_val_score as cvs
from sklearn.cross_validation import KFold
from sklearn.metrics import f1_score
from sklearn.utils import as_float_array
import models
def dvByLabel(y, dv):
"""
Find the median decision values for the positive and negative instances
of each class.
This function is used for exploratory data analysis/model evaluation.
Params:
y - the 0-1 label matrix; a numpy array
dv - array of decision values returned from a model; a numpy array
Returns:
a pandas data frame of size (# classes) x 3 with the median
decision value on negative instances for class k in position [k, 0],
the median positive decision value in [k, 1], and the count
of class k positives in [k, 2]
"""
dv = as_float_array(dv)
result = np.zeros((y.shape[1],2))
for k in range(y.shape[1]):
result[k, 0] = np.median(dv[ y[:, k] == 0, k ])
result[k, 1] = np.median(dv[ y[:, k] == 1, k ])
result = pd.DataFrame(result)
result['tot'] = y.sum(0)
return result
def getScoreFrame(y, pred):
"""
Find the F1-score and its components, as well as the count of total
positives, for each class.
This function is used for exploratory data analysis/model evaluation.
Params:
y - training labels; a 0-1 numpy array
pred - predictions; a 0-1 numpy array of the same size as y
Returns:
A Pandas data frame of size (# of classes) x 5
"""
fscores = np.zeros(y.shape[1])
tot = y.sum(0)
tp = (y * pred).sum(0)
fp = (y < pred).sum(0)
fn = (y > pred).sum(0)
for k in range(y.shape[1]):
fscores[k] = f1_score(y[:, k], pred[:, k], average='binary')
out = pd.DataFrame({'tot':tot})
out['tp'] = tp
out['fp'] = fp
out['fn'] = fn
out['f1'] = fscores
return out
def grid(model, x, y, cvals, t1vals, t2vals, k=3):
"""
Performs grid search for hyperparameters used in the uniformOVA model.
NB: This was re-implemented (vs. using grid search from sklearn) because
sklearn grid search would try every (C, t1, t2) tuple, which would retrain
the SVC with the same C value t1 x t2 times. This trains the SVC once,
and then searches over the (t1, t2) 2-tuples, which is a fast thresholding
operation.
Params:
model - a model that has parameter C, t1 and t2
x - training features
y - training labels
cvals - list or array of C parameter values for linear SVC
t1vals - list or array of t1 (global threshold) values
t2vals - list or array of t2 (proximity to max dv threshold) values
k - # of cross-validation folds
Returns:
an array with every tuple of C, t1 and t2, along with its mean F1-score
"""
result = []
for c in cvals:
model.c = c
(pred, dv) = predictCV(model, x, y, k)
for t1val in t1vals:
for t2val in t2vals:
pred = models.repredict(dv, t1val, t2val)
score = f1_score(y, pred, average='samples')
result.append((c, t1val, t2val, score))
return result
def predictCV(model, x, y, k=3):
"""
Makes predictions over the training set using a cross-validation loop
such that the predictions for each instance are from the model trained
when that instance was held out.
Params:
model - The model used. Model state will be changed by training.
x - training features
y - training labels
k - # of CV folds
Returns: a 2-tuple
pred - a numpy array of 0-1 predictions, similar to what gets submitted.
dvs - a numpy array of the decision values of the SVC
"""
folds = KFold(y.shape[0], k)
pred = 0*y
dvs = 0*y
for train, val in folds:
model.fit(x[train], y[train])
dvs[val] = model.decision_function(x[val])
pred[val] = model.predict(x[val])
return (pred, dvs)
def cvk(model, x, y, k=3):
"""
Computes mean F1-score for a provided model and training data.
Params:
model - the model to use in cross-validation
x - training features
y - training labels
k - number of cross-validation folds
Returns:
a numpy array of the mean F1-scores for each fold
"""
return cvs(model, x, y, scoring=meanF1scorer, cv=k, n_jobs=3)
def meanF1array(y, pred):
"""
Function for computing mean F1-score given ground truth labels
and a same-sized numpy array of 0-1 predictions.
Params:
y - 0-1 array of ground truth labels
a numpy array of size (# instances) x (# classes)
pred - 0-1 array of predictions of same size as y
Returns:
mean F1-score for the predictions, pred given the labels, y
"""
row_f1 = np.zeros((y.shape[0]))
for k in range(y.shape[0]):
tp = (pred[k] * y[k]).sum()
fp = (pred[k] > y[k]).sum()
fn = (pred[k] < y[k]).sum()
precision = tp/(tp + fp + 1e-9)
recall = tp/(tp + fn + 1e-9)
row_f1[k] = 2 * precision * recall / (precision + recall + 1e-9)
return np.mean(row_f1)
def meanF1scorer(model, x, y):
"""
A scoring function for mean F1-score with the right signature
to use as the 'scoring' parameter in sklearn.metric.cross_val_score.
Params:
model - the model to use for prediction
x - features to use for prediction
y - ground truth labels for the examples in x
a numpy 0-1 array of size (# instances) x (# classes)
Returns:
the mean F1-score for the predictions on x
"""
pred = model.predict(x)
return meanF1array(y, pred)