-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSAR_Indexer.py
52 lines (38 loc) · 1.83 KB
/
SAR_Indexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import argparse
import pickle
import sys
import time
from SAR_lib import SAR_Project
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Index a directory with news in json format.')
parser.add_argument('newsdir', metavar='newsdir', type=str,
help='directory with the news.')
parser.add_argument('index', metavar='index', type=str,
help='name of the file to save the project object.')
parser.add_argument('-S', '--stem', dest='stem', action='store_true', default=False,
help='compute stem index.')
parser.add_argument('-P', '--permuterm', dest='permuterm', action='store_true', default=False,
help='compute permuterm index.')
parser.add_argument('-M', '--multifield', dest='multifield', action='store_true', default=False,
help='compute index for all the fields.')
parser.add_argument('-O', '--positional', dest='positional', action='store_true', default=False,
help='compute positional index.')
#ARGUMENTOS AÑADIDOS EN ALGORITMICA
parser.add_argument('-X', '--approximation', dest='approximation', action='store_true', default=False,
help='compute approximate vocabulary')
parser.add_argument('-I', '--trie', dest='trie', action='store_true', default=False,
help='use trie data structure for vocab')
args = parser.parse_args()
newsdir = args.newsdir
indexfile = args.index
indexer = SAR_Project()
t0 = time.time()
indexer.index_dir(newsdir, **vars(args))
t1 = time.time()
with open(indexfile, 'wb') as fh:
pickle.dump(indexer, fh)
t2 = time.time()
indexer.show_stats()
print("Time indexing: %2.2fs." % (t1 - t0))
print("Time saving: %2.2fs." % (t2 - t1))
print()