-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSpamClassifierLstmPosUniversal.py
105 lines (88 loc) · 4.26 KB
/
SpamClassifierLstmPosUniversal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import torch
import torch.nn as nn
from UniversalTagger import UniversalTagger
class SpamClassifierLstmPosUniversal(nn.Module):
def __init__(self, vocab_size, output_size, embedding_matrix, embedding_size, hidden_dim, device, index_mapper, drop_prob):
super(SpamClassifierLstmPosUniversal, self).__init__()
self.output_size = output_size
self.hidden_dim = hidden_dim
self.device = device
self.embedding_size = embedding_size
self.embedding = nn.Embedding(vocab_size, embedding_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm_cell_empty = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_adj = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_adp = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_adv = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_conj = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_det = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_noun = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_num = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_prt = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_pron = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_verb = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_other = nn.LSTMCell(embedding_size, hidden_dim)
self.lstm_cell_x = nn.LSTMCell(embedding_size, hidden_dim)
self.dropout = nn.Dropout(drop_prob)
# dense layer
self.fc = nn.Linear(hidden_dim, output_size)
# activation function
self.sigmoid = nn.Sigmoid()
self.tagger = UniversalTagger()
self.indexMapper = index_mapper
self.tag_counter = dict()
for tag in self.tagger.possible_tags():
self.tag_counter[tag] = 0
def forward(self, x, hidden):
batch_size = x.size(0)
x = x.long()
indices_list = x.tolist()[0]
list_of_words = self.indexMapper.indices_to_words(indices_list)
list_of_tags = self.tagger.map_sentence(list_of_words)
embeds = self.embedding(x)
for i in range(0, 100):
tag = list_of_tags[i]
self.tag_counter[tag] += 1
cell_input = embeds[0][i].view(batch_size, self.embedding_size)
if tag == 'EMPTY':
hidden = self.lstm_cell_empty(cell_input, hidden)
elif tag == 'ADJ':
hidden = self.lstm_cell_adj(cell_input, hidden)
elif tag == 'ADP':
hidden = self.lstm_cell_adp(cell_input, hidden)
elif tag == 'ADV':
hidden = self.lstm_cell_adv(cell_input, hidden)
elif tag == 'CONJ':
hidden = self.lstm_cell_conj(cell_input, hidden)
elif tag == 'DET':
hidden = self.lstm_cell_det(cell_input, hidden)
elif tag == 'NOUN':
hidden = self.lstm_cell_noun(cell_input, hidden)
elif tag == 'NUM':
hidden = self.lstm_cell_num(cell_input, hidden)
elif tag == 'PRT':
hidden = self.lstm_cell_prt(cell_input, hidden)
elif tag == 'PRON':
hidden = self.lstm_cell_pron(cell_input, hidden)
elif tag == 'VERB':
hidden = self.lstm_cell_verb(cell_input, hidden)
elif tag == '.':
hidden = self.lstm_cell_other(cell_input, hidden)
elif tag == 'X':
hidden = self.lstm_cell_x(cell_input, hidden)
else:
print("Unexpected tag:", tag)
raise NotImplementedError('Unexpected tag!')
lstm_out = hidden[0].contiguous().view(-1, self.hidden_dim)
out = self.dropout(lstm_out)
out = self.fc(out)
out = self.sigmoid(out)
out = out.view(batch_size, -1)
out = out[:, -1]
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(batch_size, self.hidden_dim).zero_().to(self.device),
weight.new(batch_size, self.hidden_dim).zero_().to(self.device))
return hidden