-
Notifications
You must be signed in to change notification settings - Fork 13
/
prepare.sh
executable file
·96 lines (87 loc) · 2.94 KB
/
prepare.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env bash
# make the data dir and subdirs
if [ ! -d data ]; then
echo "Making data dir..."
mkdir data
fi
if [ ! -d data/ckpts ]; then
echo "Making ckpts dir..."
mkdir data/ckpts
fi
if [ ! -d results ]; then
echo "Making results dir..."
mkdir results
fi
if [ ! -d data/arct ]; then
echo "Making arct dir..."
mkdir data/arct
fi
if [ ! -d data/glove ]; then
echo "Making glove dir..."
mkdir data/glove
fi
# download the data
if [ ! -f data/arct/train-original.csv ]; then
echo "Downloading train data..."
wget https://github.com/habernal/semeval2018-task12/raw/master/data/train/train-full.txt -P data/arct/
mv data/arct/train-full.txt data/arct/train-original.csv
fi
if [ ! -f data/arct/dev-original.csv ]; then
echo "Downloading dev data..."
wget https://github.com/habernal/semeval2018-task12/raw/master/data/dev/dev-full.txt -P data/arct/
mv data/arct/dev-full.txt data/arct/dev-original.csv
fi
if [ ! -f data/arct/test-original.csv ]; then
# download the test set
echo "Downloading test data..."
if [ ! -f data/arct/test-only-data.txt ]; then
wget https://github.com/habernal/semeval2018-task12/raw/master/data/test/test-only-data.txt -P data/arct/
fi
if [ ! -f data/arct/truth.txt ]; then
wget https://github.com/habernal/semeval2018-task12-results/raw/master/data/gold/truth.txt -P data/arct/
fi
# merge the test labels
echo "Merging test labels..."
python merge_test_labels.py
# delete redundant files
echo "Cleaning up redundant files..."
rm data/arct/test-only-data.txt
rm data/arct/truth.txt
fi
# copy the adversarial dataset into data/arct
if [ ! -f data/arct/train-swapped.csv ]; then
cp adversarial_dataset/train-swapped.csv data/arct/train-swapped.csv
fi
if [ ! -f data/arct/train-adv-negated.csv ]; then
cp adversarial_dataset/train-adv-negated.csv data/arct/train-adv-negated.csv
fi
if [ ! -f data/arct/dev-adv-negated.csv ]; then
cp adversarial_dataset/dev-adv-negated.csv data/arct/dev-adv-negated.csv
fi
if [ ! -f data/arct/test-adv-negated.csv ]; then
cp adversarial_dataset/test-adv-negated.csv data/arct/test-adv-negated.csv
fi
# download GloVe
if [ ! -f data/arct/glove.npy ]; then
if [ ! -f data/glove/glove.840B.300d.txt ]; then
if [ ! -f data/glove/glove.840B.300d.zip ]; then
echo "Downloading GloVe..."
wget http://nlp.stanford.edu/data/glove.840B.300d.zip -P data/glove/
fi
echo "Unzipping Glove..."
unzip data/glove/glove.840B.300d.zip -d data/glove/ || exit 1
fi
fi
# build the vocab and GloVe matrix for BOV experiments
if [ ! -f data/arct/vocab.json ]; then
python build_vocab.py
fi
if [ ! -f data/arct/glove.npy ]; then
python build_glove.py
fi
# clean up redundant GloVe files (ARCT GloVe vectors live in the data/arct dir)
if [ -f data/glove ]; then
echo "Deleting redundant GloVe files..."
rm -rf data/glove
fi
echo "Success."