-
Notifications
You must be signed in to change notification settings - Fork 4
/
test_1.py
199 lines (183 loc) · 8.94 KB
/
test_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from load_data import load_binary_diabetes_uci, load_heart_uci, load_breast_cancer, load_adult, load_adult_race
from sklearn import svm
from sklearn.metrics import accuracy_score
import numpy as np
from measures import equalized_odds_measure_TP, equalized_odds_measure_FP, equalized_odds_measure_from_pred_TP
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from scipy.optimize import linprog
from hardt import gamma_y_hat, HardtMethod
from scipy.spatial import ConvexHull
seed = 1
np.random.seed(seed)
experiment_number = 0
if experiment_number == 0:
print('Loading diabetes dataset...')
dataset_train = load_binary_diabetes_uci()
dataset_test = load_binary_diabetes_uci()
sensible_feature = 1 # sex
print('Different values of the sensible feature', sensible_feature, ':',
set(dataset_train.data[:, sensible_feature]))
elif experiment_number == 1:
print('Loading heart dataset...')
dataset_train = load_heart_uci()
dataset_test = load_heart_uci()
sensible_feature = 1 # sex
print('Different values of the sensible feature', sensible_feature, ':',
set(dataset_train.data[:, sensible_feature]))
elif experiment_number == 2:
print('Loading adult (gender) dataset...')
dataset_train, dataset_test = load_adult(smaller=False)
sensible_feature = 9 # sex
print('Different values of the sensible feature', sensible_feature, ':',
set(dataset_train.data[:, sensible_feature]))
elif experiment_number == 3:
print('Loading adult (white vs. other races) dataset...')
dataset_train, dataset_test = load_adult_race(smaller=False)
sensible_feature = 8 # race
print('Different values of the sensible feature', sensible_feature, ':',
set(dataset_train.data[:, sensible_feature]))
if experiment_number in [0, 1]:
# % for train
ntrain = 5 * len(dataset_train.target) // 10
permutation = list(range(len(dataset_train.target)))
np.random.shuffle(permutation)
train_idx = permutation[:ntrain]
test_idx = permutation[ntrain:]
dataset_train.data = dataset_train.data[train_idx, :]
dataset_train.target = dataset_train.target[train_idx]
dataset_test.data = dataset_test.data[test_idx, :]
dataset_test.target = dataset_test.target[test_idx]
if experiment_number in [2, 3]:
ntrain = len(dataset_test.target)
# Train an SVM using the training set
print('Grid search...')
grid_search_complete = 1
if grid_search_complete:
param_grid = [
{'C': [0.1, 0.5, 1, 10, 100, 1000], 'kernel': ['linear']},
#{'C': [0.1, 0.5, 1, 10, 100, 1000], 'gamma': ['auto', 0.001, 0.0001], 'kernel': ['rbf']},
]
else:
param_grid = [{'C': [10.0], 'kernel': ['rbf'], 'gamma': ['auto']}]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid, n_jobs=3)
clf.fit(dataset_train.data, dataset_train.target)
print('Y_hat:', clf.best_estimator_)
# Accuracy
pred = clf.predict(dataset_test.data)
pred_train = clf.predict(dataset_train.data)
print('Accuracy test:', accuracy_score(dataset_test.target, pred))
print('Accuracy train:', accuracy_score(dataset_train.target, pred_train))
# Fairness measure
print('Eq. opp. test: \n',
equalized_odds_measure_TP(dataset_test, clf, [sensible_feature], ylabel=1))
print('Eq. opp. train: \n',
equalized_odds_measure_TP(dataset_train, clf, [sensible_feature], ylabel=1))
# gamma_a(Y_hat)
print('Gamma points')
points_pos = gamma_y_hat(dataset_test, clf, [sensible_feature], ylabel=1)[sensible_feature]
points_neg = gamma_y_hat(dataset_test, clf, [sensible_feature], ylabel=1, rev_pred=-1)[sensible_feature]
print(points_pos)
print(points_neg)
# P_a(Y_hat)
print('Convex hulls')
convex_hulls = []
for value in points_neg:
convex_hulls.append(np.array([[0, 0], points_pos[value], points_neg[value], [1, 1]]))
print(convex_hulls)
for idx, convex_hull in enumerate(convex_hulls):
points = convex_hull
hull = ConvexHull(points)
plt.plot(points[:, 0], points[:, 1], 'o')
for simplex in hull.simplices:
plt.plot(points[simplex, 0], points[simplex, 1], 'k-')
# plt.plot(points[hull.vertices, 0], points[hull.vertices, 1], 'r--', lw=2)
plt.plot(points[hull.vertices[0], 0], points[hull.vertices[0], 1], 'ro')
plt.plot([el[0] for el in convex_hull], [el[1] for el in convex_hull], '*')
# Algorithm
algorithm = HardtMethod(dataset_train, clf, sensible_feature)
res = algorithm.fit()
if res.status == 0:
print('Thetas:', res.x[:4])
print('Alphas:', res.x[4:])
else:
print('res.x:', res.x)
# res.x = [1.0, 1.0, 1.0, 1.0]
# res.status = 0
if res.status != 0:
print('res.status != 0:')
else:
theta_11, theta_01, theta_10, theta_00, alpha1, alpha2, alpha3, alpha4 = res.x
values_of_sensible_feature = list(set(dataset_train.data[:, sensible_feature]))
val0 = np.min(values_of_sensible_feature)
val1 = np.max(values_of_sensible_feature)
tmp = [1.0 if pred_train[idx] == 1 and dataset_train.data[idx, sensible_feature] == val1 else 0.0 for idx in
range(ntrain)]
phi_hat_11 = np.sum(tmp) / len(tmp)
tmp = [1.0 if pred_train[idx] == -1 and dataset_train.data[idx, sensible_feature] == val1 else 0.0 for idx in
range(ntrain)]
phi_hat_01 = np.sum(tmp) / len(tmp)
tmp = [1.0 if pred_train[idx] == 1 and dataset_train.data[idx, sensible_feature] == val0 else 0.0 for idx in
range(ntrain)]
phi_hat_10 = np.sum(tmp) / len(tmp)
tmp = [1.0 if pred_train[idx] == -1 and dataset_train.data[idx, sensible_feature] == val0 else 0.0 for idx in
range(ntrain)]
phi_hat_00 = np.sum(tmp) / len(tmp)
fair_pred_train = [float(algorithm.y_tilde(algorithm.model.predict(ex.reshape(1, -1)),
1 if ex[sensible_feature] == val1 else 0))
for ex in dataset_train.data]
fair_pred = [float(algorithm.y_tilde(algorithm.model.predict(ex.reshape(1, -1)),
1 if ex[sensible_feature] == val1 else 0))
for ex in dataset_test.data]
# Accuracy
print('Fair Accuracy test:', accuracy_score(dataset_train.target, fair_pred_train))
print('Fair Accuracy train:', accuracy_score(dataset_test.target, fair_pred)) # sul train?
acc_Y_hat_test = accuracy_score(dataset_test.target, pred)
acc_Y_hat_train = accuracy_score(dataset_train.target, pred_train)
y_tilde_equals_y_hat = theta_11 * phi_hat_11 + \
theta_01 * phi_hat_01 + \
theta_10 * phi_hat_10 + \
theta_00 * phi_hat_00
print('Fair Accuracy Theoretical test:', (2 * acc_Y_hat_test - 1) * y_tilde_equals_y_hat + 1 - acc_Y_hat_test)
print('Fair Accuracy Theoretical train:', (2 * acc_Y_hat_train - 1) * y_tilde_equals_y_hat + 1 - acc_Y_hat_train)
# Fairness measure
print('Eq. opp. test: \n', equalized_odds_measure_from_pred_TP(dataset_test, fair_pred, [sensible_feature],
ylabel=1)) # Feature 1 is SEX
print('Eq. opp. train: \n', equalized_odds_measure_from_pred_TP(dataset_train, fair_pred_train, [sensible_feature],
ylabel=1)) # Feature 1 is SEX # sul train?
print('Equal Opportunity constraint:')
tmp = [1.0 if pred_train[idx] == 1 else 0.0 for idx in range(ntrain)
if dataset_train.data[idx, sensible_feature] == val0 and dataset_train.target[idx] == 1]
psi_hat_101 = np.sum(tmp) / len(tmp)
tmp = [1.0 if pred_train[idx] == 1 else 0.0 for idx in range(ntrain)
if dataset_train.data[idx, sensible_feature] == val1 and dataset_train.target[idx] == 1]
psi_hat_111 = np.sum(tmp) / len(tmp)
alpha = psi_hat_101
beta = psi_hat_111
ret = theta_11 * ((1 - 2 * beta) * beta) + \
theta_10 * ((2 * alpha - 1) * alpha) + \
theta_01 * ((1 - 2 * beta) * (1 - beta)) + \
theta_00 * ((2 * alpha - 1) * (1 - alpha))
print(ret, '=', alpha - beta)
# Plot of the picked Theta
xA0 = (2 * alpha - 1) * (theta_10 * alpha + theta_00 * (1 - alpha)) + 1 - alpha
xA1 = (2 * beta - 1) * (theta_11 * beta + theta_01 * (1 - beta)) + 1 - beta
tmp = [1.0 if pred_train[idx] == 1 else 0.0 for idx in range(ntrain)
if dataset_train.data[idx, sensible_feature] == val0 and dataset_train.target[idx] == -1]
psi_hat_100 = np.sum(tmp) / len(tmp)
tmp = [1.0 if pred_train[idx] == 1 else 0.0 for idx in range(ntrain)
if dataset_train.data[idx, sensible_feature] == val1 and dataset_train.target[idx] == -1]
psi_hat_110 = np.sum(tmp) / len(tmp)
alphai = psi_hat_100
betai = psi_hat_110
yA0 = (2 * alphai - 1) * (theta_10 * alphai + theta_00 * (1 - alphai)) + 1 - alphai
yA1 = (2 * betai - 1) * (theta_11 * betai + theta_01 * (1 - betai)) + 1 - betai
# plt.plot([x], [y], 'ro')
plt.axhline(xA0, color='b')
plt.axhline(xA1, color='r')
plt.axvline(yA0, color='b')
plt.axvline(yA1, color='r')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.show()