-
Notifications
You must be signed in to change notification settings - Fork 2
/
bli_sup.sh
126 lines (98 loc) · 3.96 KB
/
bli_sup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
set -e
s1=$1
t1=$2
s2=$1
t2=$3
echo "Starting $1-$3 induction, using $2 as pivot"
if [ ! -d data/ ]; then
mkdir -p data;
fi
if [ ! -d res_sup/${s2}-${t2}/ ]; then
mkdir -p res_sup/${s2}-${t2};
fi
if [ ! -d query_sup/${s1}-${t1}/ ]; then
mkdir -p query_sup/${s1}-${t1};
fi
if [ ! -d query_sup/${s2}-${t2}/ ]; then
mkdir -p query_sup/${s2}-${t2};
fi
dico_train=data/${s1}-${t1}.0-5000.txt
if [ ! -f "${dico_train}" ]; then
DICO=$(basename -- "${dico_train}")
wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
fi
dico_test=data/${s2}-${t2}.0-5000.txt
if [ ! -f "${dico_test}" ]; then
DICO=$(basename -- "${dico_test}")
wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
fi
dico_val1=data/${s1}-${t1}.5000-6500.txt
if [ ! -f "${dico_val1}" ]; then
DICO=$(basename -- "${dico_val1}")
wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
fi
dico_val2=data/${s2}-${t2}.5000-6500.txt
if [ ! -f "${dico_val2}" ]; then
DICO=$(basename -- "${dico_val2}")
wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
fi
src_emb1=data/wiki.${s1}.vec
if [ ! -f "${src_emb1}" ]; then
EMB=$(basename -- "${src_emb1}")
wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
fi
tgt_emb1=data/wiki.${t1}.vec
if [ ! -f "${tgt_emb1}" ]; then
EMB=$(basename -- "${tgt_emb1}")
wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
fi
src_emb2=data/wiki.${s2}.vec
if [ ! -f "${src_emb2}" ]; then
EMB=$(basename -- "${src_emb2}")
wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
fi
tgt_emb2=data/wiki.${t2}.vec
if [ ! -f "${tgt_emb2}" ]; then
EMB=$(basename -- "${tgt_emb2}")
wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
fi
#Aligning embeddings
output_src1=alignment_sup/${s1}-${t1}/${s1}.vec
output_tgt1=alignment_sup/${s1}-${t1}/${t1}.vec
if [ ! -d alignment_sup/${s1}-${t1}/ ]; then
mkdir -p alignment_sup/${s1}-${t1}
python3 align.py --src_emb "${tgt_emb1}" --tgt_emb "${src_emb1}" \
--dico_train "${dico_train}" --dico_test "${dico_val1}" \
--output_src "${output_tgt1}" --output_tgt "${output_src1}" ;
fi
output_src2=alignment_sup/${s2}-${t2}/${s2}.vec
output_tgt2=alignment_sup/${s2}-${t2}/${t2}.vec
if [ ! -d alignment_sup/${s2}-${t2}/ ]; then
mkdir -p alignment_sup/${s2}-${t2}
python3 align.py --src_emb "${tgt_emb2}" --tgt_emb "${src_emb2}" \
--dico_train "${dico_test}" --dico_test "${dico_val2}" \
--output_src "${output_tgt2}" --output_tgt "${output_src2}" ;
fi
# Query Extraction
# For the testing, we do not force the presence of ground truth in each query
train_path=query_sup/${s1}-${t1}/train
if [ ! -f "${train_path}" ]; then
python3 single_query_extract.py --src_emb "${output_src1}" --tgt_emb "${output_tgt1}" \
--filename "${train_path}" --dico "${dico_train}" --query_size 10 \
--query_relevance_type 'binary' --add_csls_coord true --k_csls 10 \
--testing_query false --add_word_coord false --add_query_coord false --discard_empty_query false;
fi
test_path=query_sup/${s2}-${t2}/test
if [ ! -f "${test_path}" ]; then
python3 single_query_extract.py --src_emb "${output_src2}" --tgt_emb "${output_tgt2}" \
--filename "${test_path}" --dico "${dico_test}" --query_size 10 \
--query_relevance_type 'binary' --add_csls_coord true --k_csls 10 \
--testing_query true --add_word_coord false --add_query_coord false \
--discard_empty_query true ;
fi
# BLI Induction
#queryy_full
output_dir1=res_sup/${s2}-${t2}/${t1}/approx_ndcg_loss_group_2
python3 tf_ranking_libsvm.py --train_path "${train_path}" --vali_path "${test_path}" \
--test_path "${test_path}" --output_dir "${output_dir1}" --group_size 2 --loss "approx_ndcg_loss" \
--num_train_steps 100000 --num_features 11 --query_relevance_type 'binary' --query_size 10