@@ -19,16 +19,24 @@ class EmbedMixture(chainer.Chain):
19
19
uninterpretable until you measure the words most similar to this topic
20
20
vector.
21
21
22
+ :math:`e=\Sigma_{j=0}^{j=n\_topics} c_j \cdot \v ec{T_j}`
23
+
24
+ This is usually paired with regularization on the weights `c_j`. If using
25
+ a Dirichlet prior with low alpha, these weights will be sparse.
26
+
22
27
Args:
23
28
n_documents (int): Total number of documents
24
29
n_topics (int): Number of topics per document
25
30
n_dim (int): Number of dimensions per topic vector (should match word
26
31
vector size)
27
32
28
33
Attributes:
29
- weights (~chainer.links.EmbedID): Unnormalized topic weights. To
30
- normalize these weights, use `F.softmax(weights)`.
31
- factors (~chainer.links.Parameter): Topic vector matrix.
34
+ weights (~chainer.links.EmbedID): Unnormalized topic weights
35
+ (:math:`c_j`). To normalize these weights, use
36
+ `F.softmax(weights)`.
37
+ factors (~chainer.links.Parameter): Topic vector matrix (:math:`T_j`)
38
+
39
+ .. seealso:: :func:`lda2vec.dirichlet_likelihood`
32
40
"""
33
41
34
42
def __init__ (self , n_documents , n_topics , n_dim ):
@@ -52,7 +60,16 @@ def to_cpu(self):
52
60
super (EmbedMixture , self ).to_cpu ()
53
61
54
62
def __call__ (self , doc_ids ):
55
- """
63
+ """ Given an array of document integer indices, returns a vector
64
+ for each document. The vector is composed of topic weights projected
65
+ onto topic vectors.
66
+
67
+ Args:
68
+ doc_ids (~chainer.Variable): One-dimensional batch vectors of IDs
69
+
70
+ Returns:
71
+ ~chainer.Variable: Batch of two-dimensional embeddings for every
72
+ document.
56
73
"""
57
74
# (batchsize, ) --> (batchsize, logweights)
58
75
w = self .weights (doc_ids )
0 commit comments