-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexperiment.py
130 lines (120 loc) · 4.27 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- coding: utf-8 -*-
import os
import numpy as np
import pandas as pd
import argparse
from sklearn.preprocessing import minmax_scale
from models import build_model
from data.utils import sampling, cal_decision_rate
import warnings
def warn(*args, **kwargs):
pass
warnings.warn = warn
def run_SYN(data_path, algorithm='FOM'):
print('='*25)
print(f'= {algorithm}')
print('='*25)
files = [
os.path.join(data_path, file_name)
for file_name in os.listdir(data_path) if file_name[-4:] == '.csv'
]
assert len(files) != 0
scores = []
for i, f in enumerate(files):
print('Task %d: ' % (i + 1))
print('ground true: x-->y')
df = pd.read_csv(f)
x = df['x']
y = df['y']
x, y = sampling(x, y, 1000) # reduce sample size for hgpr
x, y = minmax_scale(x, (-1.0, 1.0)), minmax_scale(y, (-1.0, 1.0))
x = np.reshape(x, [-1, 1])
y = np.reshape(y, [-1, 1])
model = build_model(algorithm)
score = model.predict(x, y)
scores.append(score)
if score > 0.0:
print('True')
else:
print('False')
print('-' * 25)
print(f'total num of causal pair: {len(files)}')
acc = 100. * np.count_nonzero(np.array(scores) > 0.0) / float(len(files))
print('accaury: %.3f' % acc)
return acc, np.array(scores)
def run_CEP(data_path, algorithm='FOM'):
print('='*25)
print(f'= {algorithm}')
print('='*25)
files = [
os.path.join(data_path, 'pair%s.txt' % (str(i).zfill(4)))
for i in range(1, 101)
]
dfMeta = pd.read_csv(os.path.join(data_path, 'pairmeta.txt'),
header=None,
delim_whitespace=True)
weight = dfMeta[5].values
dire = dfMeta[4].values
w = 0
ws = 0
fails = []
scores = []
labels = []
weights = []
delete = [43, 44, 45, 46, 65, 66, 67, 76, 84] # delete causal pair without heteroskedasticity
cnt = 0
for i, f in enumerate(files):
if weight[i] != 0 and (i + 1) not in delete:
print('Task %d' % (i + 1))
if dire[i] == 2:
print('ground true: x-->y')
labels.append(1)
else:
print('ground true: y-->x')
labels.append(0)
ws += weight[i]
cnt += 1
weights.append(weight[i])
df = pd.read_csv(f, header=None, delim_whitespace=True)
x = df[0].values
y = df[1].values
x, y = sampling(x, y, 1000)
x, y = minmax_scale(x), minmax_scale(y)
x = np.reshape(x, [-1, 1])
y = np.reshape(y, [-1, 1])
model = build_model(algorithm)
score = model.predict(x, y)
scores.append(score)
if (score > 0. and dire[i] == 2) or (score < 0. and dire[i] == 1):
print('True')
w += weight[i]
else:
print('False')
fails.append(i + 1)
print('-' * 25)
print(f'total num of causal pair: {cnt}')
acc = w / ws
print('accaury: %.3f' % acc)
return acc, fails, (scores, labels, weights)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiment for causal discovery.')
parser.add_argument('--seed', type=int, default=1234,
help='random seed')
parser.add_argument('--experiment_type', type=str, default='CEP',
help='expriment type, CEP or SYN')
parser.add_argument('--dataset', type=str, default='./datasets/CEP/',
help='path of dataset')
parser.add_argument('--algorithm', type=str, default='FOM',
help='algorithm name. one of ANM, IGCI, RECI, FOM')
args = parser.parse_args()
np.random.seed(args.seed)
if args.experiment_type == 'CEP':
# CEP dataset
acc, fails, result = run_CEP(args.dataset, args.algorithm)
# steps, decision_rate = cal_decision_rate(result[0], result[1], result[2])
# print(decision_rate)
if args.experiment_type == 'SYN':
# synithetic dataset
acc, scores = run_SYN(args.dataset, args.algorithm)
# steps, decision_rate = cal_decision_rate(scores)
# print(decision_rate)