-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathFullTagger.py
96 lines (91 loc) · 2.51 KB
/
FullTagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import nltk
from nltk.data import load
from Tagger import AbstractTagger
class FullTagger(AbstractTagger):
def __init__(self):
AbstractTagger.__init__(self)
tag_list = load('help/tagsets/upenn_tagset.pickle').keys()
self.possibleTagsList = list(dict.fromkeys(map(self.__map_tag, tag_list)))
def possible_tags(self):
return self.possibleTagsList
def map_sentence(self, sentence):
list_of_pos_tags = nltk.pos_tag(sentence)
list_of_tags = []
for i in range(0, len(list_of_pos_tags)):
tag_tuple = list_of_pos_tags[i]
tag = self.__map_tag(tag_tuple[1])
list_of_tags.append(tag)
return list_of_tags
def __map_tag(self, tag):
if tag == 'VBN':
return 'VBN'
elif tag == 'VBZ':
return 'VBZ'
elif tag == 'VBG':
return 'VBG'
elif tag == 'VBP':
return 'VBP'
elif tag == 'VBD':
return 'VBD'
elif tag == 'MD':
return 'MD'
elif tag == 'NN':
return 'NN'
elif tag == 'NNPS':
return 'NNPS'
elif tag == 'NNP':
return 'NNP'
elif tag == 'NNS':
return 'NNS'
elif tag == 'JJS':
return 'JJS'
elif tag == 'JJR':
return 'JJR'
elif tag == 'JJ':
return 'JJ'
elif tag == 'RB':
return 'RB'
elif tag == 'RBR':
return 'RB'
elif tag == 'RBS':
return 'RB'
elif tag == '-':
return 'EMPTY'
elif tag == 'CD':
return 'CD'
elif tag == 'IN':
return 'IN'
elif tag == 'PDT':
return 'PDT'
elif tag == 'CC':
return 'CC'
elif tag == 'EX':
return 'EX'
elif tag == 'POS':
return 'POS'
elif tag == 'RP':
return 'RP'
elif tag == 'FW':
return 'FW'
elif tag == 'DT':
return 'DT'
elif tag == 'UH':
return 'UH'
elif tag == 'TO':
return 'TO'
elif tag == 'PRP':
return 'PRP'
elif tag == 'PRP$':
return 'PRP$'
elif tag == '$':
return '$'
elif tag == 'WP':
return 'WP'
elif tag == 'WP$':
return 'WP$'
elif tag == 'WDT':
return 'WDT'
elif tag == 'WRB':
return 'WRB'
else:
return 'OTHER'