-
Notifications
You must be signed in to change notification settings - Fork 18
/
qald_test_to_id.py
112 lines (83 loc) · 3.32 KB
/
qald_test_to_id.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Takes the qald test big data and converts it into a format which
# can then be used by one file easily.
#Read the qald file.
#Find out all the relationships and update the relationship file
#idfy all the required paths and store it in a specific json format
'''
data['uri']['question-id']
data['uri']['hop-2-properties'] + data['uri']['hop-1-properties']
data['parsed-data']['path']
'''
import json
import pickle
import data_creator_step2 as dc2
from utils import dbpedia_interface as dbi
from utils import embeddings_interface as ei
from utils import natural_language_utilities as nlutils
ei.__check_prepared__()
dbp = dbi.DBPedia(caching=True)
data = json.load(open('data/data/qald/qald_big_data_test.json'))
def retrive_relations(node):
relation_list = []
path = node['parsed-data']['path']
print("**path is ", path)
if path != [-1]:
# path = [+abc,-pqr]
# if path[0][0] == '+':
# path[0][0] = '-'
# elif path[0][0] == '-':
# path[0][0] = '+'
p = [p[1:] for p in path]
relation_list = relation_list + p
hop1 = [h[-1] for h in node['uri']['hop-1-properties']]
relation_list = relation_list + hop1
for h in node['uri']['hop-2-properties']:
relation_list.append(h[1])
relation_list.append(h[-1])
return list(set(relation_list))
def idfy_path(paths,relation_dict,dbp):
for index,path in enumerate(paths):
paths[index],relation_dict = dc2.idfy_path(path,relation_dict,dbp)
return paths,relation_dict
rel = []
for node in data:
r = retrive_relations(node)
rel = rel + r
rel = list(set(rel))
relation_dict = pickle.load(open('data/data/common/relations.pickle', 'rb'))
for r in rel:
_,relation_dict = dc2.update_relation_dict(relation=r,relation_dict=relation_dict,dbp=dbp)
new_data = []
for node in data:
hop1properties,relation_dict = idfy_path(paths=node['uri']['hop-1-properties'],relation_dict=relation_dict,dbp=dbp)
hop2properties,relation_dict = idfy_path(paths=node['uri']['hop-2-properties'],relation_dict=relation_dict,dbp=dbp)
path = node['parsed-data']['path']
if path != [-1]:
# path = [+abc,-pqr]
# path = node['parsed-data']['node']['path']
new_path = []
for p in path:
p_temp,relation_dict = dc2.idfy_const(p[1:],relation_dict,dbp)
new_path.append(p[0])
new_path.append(p_temp)
# p,relation_dict = dc2.idfy_const(path[1:],relation_dict,dbp)
path = new_path
data_s = {
'uri':{
'hop-1-properties' : hop1properties,
'hop-2-properties' : hop2properties,
'question-id' : [int(id) for id in list(ei.vocabularize(nlutils.tokenize(node['parsed-data']['corrected_question'])))]
},
'parsed-data':{
'path' : path,
'entity' : node['parsed-data']['entity'],
'sparql_query' : node['parsed-data']['sparql_query'],
'constraints' : node['parsed-data']['constraints'],
'corrected_question' : node['parsed-data']['corrected_question'],
'node': node['parsed-data']
}
}
new_data.append(data_s)
#Save the relation dict
pickle.dump(relation_dict,open('data/data/common/relations.pickle', 'wb+'))
json.dump(new_data,open('data/data/qald/qald_id_big_data_test.json','w+'))