-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataprep.py
More file actions
38 lines (25 loc) · 1.05 KB
/
dataprep.py
File metadata and controls
38 lines (25 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# -*- coding: utf-8 -*-
from gensim.models import doc2vec
class DataPrepare:
#[["Referandumda",""],["Cinemaximum"]
def __init__ (self,s):
self.model = None
self.split_s = []
self.s = s
def convertSentence(self):
for i in range(0,len(self.s)):
self.split_s.append(doc2vec.LabeledSentence(words=self.s[i].split(),tags=["Tweet" + str(i)]))
def createModel(self):
print (self.split_s)
self.model = doc2vec.Doc2Vec(self.split_s,size=100,window=3,min_count=1,workers=5)
#self.model.train(self.split_s)
print (self.model.docvecs[0].shape)
#print (self.model["Referandumda"])
def convertToVector(self,sentence):
deneme = doc2vec.LabeledSentence(words=sentence.split(),tags="Testeet")
return self.model.infer_vector(["sinema","bileti"])
d = DataPrepare(["Referandumda ne olacak?","Bedava sinema bileti"])
d.convertSentence()
d.createModel()
v1 = d.convertToVector("bileti sinema")
v2 = d.convertToVector("Referandumda olacak")