forked from blei-lab/onlineldavb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtopicmodelvb.py
604 lines (556 loc) · 24.3 KB
/
topicmodelvb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
# topicmodelvb_v2.py
import sys, re, time, string
import numpy as n
import matplotlib.pyplot as plt
from utils import *
from corpus import *
n.random.seed(100000001)
meanchangethresh = 0.001
class _TopicModel:
"""
Skeleton for SVI training of topic models (LDA 1/K or SB-LDA).
"""
def __init__(self, vocab, K, topicpath, D, alpha0, eta, tau0, kappa):
"""
Arguments:
K: Number of topics
vocab: A set of words to recognize. When analyzing documents, any word
not in this set will be ignored.
D: Total number of documents in the population.
topicpath: Path to some pre-trained topics' Dirichlet parameters. User
alpha0: for LDA 1/K, alpha = alpha0/K is the hyperparameter for prior on topic
proportions theta_d. For SB-LDA, alpha0 is governs the stick-breaking
Beta(1,alpha0).
eta: Hyperparameter for prior on topics beta
tau0: A (positive) learning parameter that downweights early iterations
kappa: Learning rate: exponential decay rate---should be between
(0.5, 1.0] to guarantee asymptotic convergence.
Remarks:
User should make sure the topics from topicpath are compatible with
K and vocab.
"""
t0 = time.time()
self._vocab, self._idxtoword = make_vocab(vocab)
self._K = K
self._W = len(self._vocab)
self._D = D
self._eta = eta
self._tau0 = tau0 + 1
self._kappa = kappa
self._updatect = 0
# Initialize the variational distribution q(beta|lambda) if topicpath is not None
if (topicpath is None):
self._lambda = 1*n.random.gamma(100., 1./100., (self._K, self._W))
else:
self._lambda = n.loadtxt(topicpath)
assert self._lambda.shape==(self._K,self._W), "Wrong shape of topics"
print("Successfully loaded topics from %s" %topicpath)
self._Elogbeta = dirichlet_expectation(self._lambda)
self._expElogbeta = n.exp(self._Elogbeta)
t1 = time.time()
print(("Time to initialize topic model is %.2f" %(t1-t0)))
return
def update_lambda(self, wordids, wordcts):
"""
First does an E step on the mini-batch given in wordids and
wordcts, then uses the result of that E step to update the
variational parameter matrix lambda.
Arguments:
docs: List of D documents. Each document must be represented
as a string. (Word order is unimportant.) Any
words not in the vocabulary will be ignored.
Returns variational parameters for per-document topic proportions.
"""
# rhot will be between 0 and 1, and says how much to weight
# the information we got from this mini-batch.
rhot = pow(self._tau0 + self._updatect, -self._kappa)
self._rhot = rhot
# Do an E step to update gamma, phi | lambda for this
# mini-batch. This also returns the information about phi that
# we need to update lambda.
varparams, sstats = self.do_e_step(wordids, wordcts)
# Estimate held-out likelihood for current values of lambda.
# Update lambda based on documents.
self._lambda = self._lambda * (1-rhot) + \
rhot * (self._eta + self._D * sstats / len(wordids))
self._Elogbeta = dirichlet_expectation(self._lambda)
self._expElogbeta = n.exp(self._Elogbeta)
self._updatect += 1
return varparams
def log_likelihood_one(self, wordobs_ids, wordobs_cts, wordho_ids, \
wordho_cts):
"""
Inputs:
wordobs_ids: list, index in vocab of unique observed words
wordobs_cts: list, number of occurences of each unique observed word
wordho_ids: list, index in vocab of held-out words
wordho_cts: list, number of occurences of each unique held-out word
Outputs:
average log-likelihood of held-out words for the given document
"""
# theta_means should be 1 x self._K
theta_means = self.theta_means(wordobs_ids, wordobs_cts)
# lambda_sums should be self._K x 1
lambda_sums = n.sum(self._lambda, axis=1)
# lambda_means should be self._K x self._W, rows suming to 1
lambda_means = self._lambda/lambda_sums[:, n.newaxis]
Mho = list(range(0,len(wordho_ids)))
proba = [wordho_cts[i]*n.log(n.dot(theta_means,lambda_means[:,wordho_ids[i]])) \
for i in Mho]
# average across all held-out words
tot = sum(wordho_cts)
return sum(proba)/tot
def log_likelihood_docs(self, wordids, wordcts):
"""
Inputs:
wordids: list of lists
wordcts: list of lists
Outputs:
"""
t0 = time.time()
M = len(wordids)
log_likelihoods = []
for i in range(M):
docids = wordids[i] # list
doccts = wordcts[i] # list
# only evaluate log-likelihood if non-trivial document
if len(docids) > 1:
wordobs_ids, wordobs_cts, wordho_ids, wordho_cts = \
split_document(docids, doccts)
doc_likelihood = \
self.log_likelihood_one(wordobs_ids, wordobs_cts, wordho_ids, wordho_cts)
log_likelihoods.append(doc_likelihood)
t1 = time.time()
# print("Time taken to evaluate log-likelihood %.2f" %(t1-t0))
return n.mean(log_likelihoods)
class LDA(_TopicModel):
"""
Inherit _TopicModel to train LDA 1/K.
"""
def __init__(self, vocab, K, topicpath, D, alpha0, eta, tau0, kappa):
"""
Arguments:
K: Number of topics
vocab: A set of words to recognize. When analyzing documents, any word
not in this set will be ignored.
topicpath:
D: Total number of documents in the population.
alpha0:
eta: Hyperparameter for prior on topics beta
tau0: A (positive) learning parameter that downweights early iterations
kappa: Learning rate: exponential decay rate---should be between
(0.5, 1.0] to guarantee asymptotic convergence.
Note that if you pass the same set of D documents in every time and
set kappa=0 this class can also be used to do batch VB.
"""
self._alpha = alpha0/K
_TopicModel.__init__(self, vocab, K, topicpath, D, alpha0, eta, tau0, kappa)
return
def do_e_step(self, wordids, wordcts):
batchD = len(wordids)
# Initialize the variational distribution q(theta|gamma) for
# the mini-batch
# each gamma[:,:] has mean 1 and variance 0.01
gamma = 1*n.random.gamma(100., 1./100., (batchD, self._K))
Elogtheta = dirichlet_expectation(gamma)
expElogtheta = n.exp(Elogtheta)
sstats = n.zeros(self._lambda.shape)
# Now, for each document d update that document's gamma and phi
it = 0
meanchange = 0
converged = False
for d in range(0, batchD):
# These are mostly just shorthand (but might help cache locality)
ids = wordids[d]
cts = wordcts[d]
gammad = gamma[d, :]
Elogthetad = Elogtheta[d, :]
expElogthetad = expElogtheta[d, :]
expElogbetad = self._expElogbeta[:, ids]
# The optimal phi_{dwk} is proportional to
# expElogthetad_k * expElogbetad_w. phinorm is the normalizer.
phinorm = n.dot(expElogthetad, expElogbetad) + 1e-100
# Iterate between gamma and phi until convergence
for it in range(0, 200):
lastgamma = gammad
# We represent phi implicitly to save memory and time.
# Substituting the value of the optimal phi back into
# the update for gamma gives this update. Cf. Lee&Seung 2001.
gammad = self._alpha + expElogthetad * \
n.dot(cts / phinorm, expElogbetad.T)
Elogthetad = dirichlet_expectation(gammad)
expElogthetad = n.exp(Elogthetad)
phinorm = n.dot(expElogthetad, expElogbetad) + 1e-100
# If gamma hasn't changed much, we're done.
meanchange = n.mean(abs(gammad - lastgamma))
if (meanchange < meanchangethresh):
converged = True
break
# might have exited coordinate ascent without convergence
"""
if (not converged):
print("Coordinate ascent in E-step didn't converge")
print("Last change in gammad %f" %meanchange)
"""
gamma[d, :] = gammad
# Contribution of document d to the expected sufficient
# statistics for the M step.
sstats[:, ids] += n.outer(expElogthetad.T, cts/phinorm)
# This step finishes computing the sufficient statistics for the
# M step, so that
# sstats[k, w] = \sum_d n_{dw} * phi_{dwk}
# = \sum_d n_{dw} * exp{Elogtheta_{dk} + Elogbeta_{kw}} / phinorm_{dw}.
sstats = sstats * self._expElogbeta
return (gamma,sstats)
def theta_means(self, wordobs_ids, wordobs_cts):
"""
Inputs:
wordobs_ids = list
wordobs_cts = list
Outputs:
Report E(q(theta(k)) across topics, where q(theta) is variational
approximation of the new document's topic proportions.
"""
# do E-step for the document represented by the observed words
# gamma should be 1 x self._K
gamma, _ = self.do_e_step([wordobs_ids],[wordobs_cts])
# q(theta|gamma) is Dirichlet, so marginal means are average of Dirichlet parameters
theta = gamma/n.sum(gamma)
theta = theta.flatten(order='C')
return theta
class SB_LDA(_TopicModel):
"""
Inherit _TopicModel to train SB-LDA at level K.
"""
def __init__(self, vocab, K, topicpath, D, alpha0, eta, tau0, kappa):
"""
Arguments:
K: Number of topics
vocab: A set of words to recognize. When analyzing documents, any word
not in this set will be ignored.
topicpath:
D: Total number of documents in the population.
alpha0: Hyperparameter of the stick-breaking weights Beta(1,alpha0).
eta: Hyperparameter for prior on topics beta
tau0: A (positive) learning parameter that downweights early iterations
kappa: Learning rate: exponential decay rate---should be between
(0.5, 1.0] to guarantee asymptotic convergence.
"""
self._alpha0 = alpha0
_TopicModel.__init__(self, vocab, K, topicpath, D, alpha0, eta, tau0, kappa)
# for updating tau given phi
mask = n.zeros((self._K, self._K))
for i in range(self._K):
for j in range(self._K):
mask[i,j] = int(j > i)
self._fmask = mask # size (self._K, self._K)
# for updating phi given tau
self._bmask = mask.transpose()
"""
print("Mask for updating tau given phi")
print(self._fmask)
print("Mask for updating phi given tau")
print(self._bmask)
"""
return
def ELBO(self, ids, cts, phi, tau1, tau2):
"""
Compute ELBO of sampled document as function of local variational parameters.
Inputs:
ids = list, index of unique words
cts = list, count of unique words
phi = (self._K x len(cts)), variational parameters over per-word topic assignments
tau1, tau2 = lists, variational parameers over per-document stick-breaking weight
Outputs:
ELBO(phi, tau1, tau2) for the sampled document.
Remarks:
"""
# KL terms
KLs = beta_KL(tau1[:(self._K-1)],tau2[:(self._K-1)],1,self._alpha0)
term1 = -n.sum(KLs)
# E q [log p (w | z, beta)]
Elogbetad = self._Elogbeta[:, ids] # (self._K, len(ids))
term2 = n.sum(n.multiply(cts, n.sum(n.multiply(phi, Elogbetad),axis=0)))
# E q [log p (z | p)]
Elogpm1pd = dirichlet_expectation(n.column_stack((tau1,tau2)))
Elogpd = Elogpm1pd[:,0] # shape (self._K,).
Elogm1pd = Elogpm1pd[:,1] # shape (self._K,). Last value is -Inf since Beta(1,0), need to fix.
Elogm1pd[self._K-1] = 0
Elogthetad = Elogpd + n.dot(self._bmask, Elogm1pd) # shape (self._K,)
term3 = n.sum(n.multiply(cts, n.dot(Elogthetad[n.newaxis,:], phi)))
# E q [log q(z)]
temp = multinomial_entropy(phi)
term4 = -n.sum(n.multiply(cts,multinomial_entropy(phi)))
bound = term1 + term2 + term3 + term4
return bound
def ELBOglobal(self, ids, cts, phi, tau1, tau2):
"""
Compute ELBO as function of all variational parameters, pretending that the
document (ids, cts) occurred self._D times.
"""
nKL = -n.sum(dirichlet_KL(self._lambda,self._eta*n.ones((self._K, self._W))))
local = self._D*self.ELBO(ids, cts, phi, tau1, tau2)
return nKL + local
def init_phi(self, ids):
"""
Inputs:
ids:
Outputs:
initialize phi as if all Elogthetad is equal to each other i.e.
only considering effect of topics rather than topic proportions.
"""
Elogbetad = self._Elogbeta[:, ids] # (self._K, len(ids))
logphi = Elogbetad # size (self._K, len(ids))
# normalize across rows
unormphi = n.exp(logphi)
phinorm = n.sum(unormphi, axis=0)+1e-100 # size should be 1 x len(cts)
phi = unormphi/phinorm[n.newaxis, :]
return phi
def init_tau(self, batch_size):
"""
Inputs:
batch_size = number of documents being processed
Outputs:
initialize tau to be uninformative about theta
Remarks:
try out some options in initialize tau
"""
tau1 = n.random.gamma(100., 1./100., (batch_size, self._K))
tau1[:,self._K-1] = 1 # corner case
tau2 = n.random.gamma(100*self._alpha0, 1./100., (batch_size, self._K))
tau2[:,self._K-1] = 0 # corner case
return tau1, tau2
def opt_phi(self, tau1d, tau2d, ids):
"""
Inputs:
tau1d:
tau2d:
ids:
Outputs:
"""
Elogbetad = self._Elogbeta[:, ids] # (self._K, len(ids))
Elogpm1pd = dirichlet_expectation(n.column_stack((tau1d,tau2d)))
Elogpd = Elogpm1pd[:,0] # shape (self._K,).
Elogm1pd = Elogpm1pd[:,1] # shape (self._K,). Last value is -Inf since Beta(1,0), need to fix.
Elogm1pd[self._K-1] = 0
""" print("Elogm1pd") print(Elogm1pd) """
# for now, explicitly represent optimal phi to ensure correctness
Elogthetad = Elogpd + n.dot(self._bmask, Elogm1pd) # shape (self._K,)
""" print("Elogthetad") print(Elogthetad) """
logphi = Elogthetad[:, n.newaxis] + Elogbetad # size (self._K, len(cts))
# normalize across rows
unormphi = n.exp(logphi)
phinorm = n.sum(unormphi, axis=0)+1e-100 # size should be 1 x len(cts)
phi = unormphi/phinorm[n.newaxis, :]
return phi
def opt_tau(self, phi, cts):
"""
Inputs:
phi:
cts:
Outputs:
"""
tau1d = 1 + n.dot(phi, cts) # careful, dot of 2-D array with list!
tau2d = self._alpha0 + n.dot(n.dot(self._fmask, phi), cts) # careful, dot of 2-D array with list
tau1d[self._K - 1] = 1
tau2d[self._K - 1] = 0
return (tau1d, tau2d)
def do_e_step(self, wordids, wordcts):
batchD = len(wordids)
# Initialize the variational distribution q(pi|tau) for
# the mini-batch
tau1, tau2 = self.init_tau(batchD) # each has size batchD x self._K
sstats = n.zeros(self._lambda.shape) # shape (self._K, self._W)
# Now, for each document d update that document's gamma and phi
it = 0
meanchange = 0
for d in range(0, batchD):
# These are mostly just shorthand (but might help cache locality)
ids = wordids[d] # list
cts = wordcts[d] # list
tau1d, tau2d = tau1[d, :], tau2[d, :] # each has size (self._K,)
phi = self.opt_phi(tau1d, tau2d, ids)
# Iterate between tau and phi until convergence
converged = False
for it in range(0, 200):
lasttau1 = tau1d
lasttau2 = tau2d
tau1d, tau2d = self.opt_tau(phi, cts)
phi = self.opt_phi(tau1d, tau2d, ids)
# If tau hasn't changed much, we're done.
meanchange = 0.5*n.mean(abs(lasttau1 - tau1d)) + 0.5*n.mean(abs(lasttau2 - tau2d))
if (meanchange < meanchangethresh):
converged = True
break
tau1[d, :] = tau1d
tau2[d, :] = tau2d
sstats[:, ids] += n.multiply(phi,cts)
return ((tau1, tau2), sstats)
def debug_e_step(self, ids, cts, init_type="tau"):
"""
Almost the same functionality as do_e_step, but with additions
to aid debugging: record ELBO across coordinate ascent steps
Inputs:
ids = list, index of unique words
cts = list, count of unique words
ax = axis to plot ELBO
init_type = str, either "tau" or "phi"
Outputs:
tau, ELBO, sstats
Remarks:
"""
# Initialize the variational distribution q(pi|tau) for
# the mini-batch
sstats = n.zeros(self._lambda.shape) # shape (self._K, self._W)
# record ELBO as function of local variational parameters
lbound = []
# record change in variational parameters
changes = []
if (init_type == "tau"):
tau1, tau2 = self.init_tau(1) # each has size 1 x self._K
tau1d, tau2d = tau1[0, :], tau2[0, :] # each has size (self._K,)
phi = self.opt_phi(tau1d, tau2d, ids) # self._K x len(ids)
elif (init_type == "phi"):
phi = self.init_phi(ids) # self._K x len(ids)
tau1d, tau2d = self.opt_tau(phi, cts) # each has size (self._K,)
"""
print("Initial topic proportion after %s init is " %init_type)
print(GEM_expectation(tau1d[n.newaxis,:], tau2d[n.newaxis,:], self._K))
"""
lbound.append(self.ELBO(ids, cts, phi, tau1d, tau2d))
# Now, for each document d update that document's gamma and phi
it = 0
meanchange = 0
# Iterate between tau and phi until convergence
converged = False
for it in range(0, 200):
lasttau1 = tau1d
lasttau2 = tau2d
if (init_type == "tau"):
tau1d, tau2d = self.opt_tau(phi, cts)
phi = self.opt_phi(tau1d, tau2d, ids)
elif (init_type == "phi"):
phi = self.opt_phi(tau1d, tau2d, ids)
tau1d, tau2d = self.opt_tau(phi, cts)
# If tau hasn't changed much, we're done.
lbound.append(self.ELBO(ids, cts, phi, tau1d, tau2d))
meanchange = 0.5*n.mean(abs(lasttau1 - tau1d)) + 0.5*n.mean(abs(lasttau2 - tau2d))
changes.append(meanchange)
if (meanchange < meanchangethresh):
converged = True
break
sstats[:, ids] += n.multiply(phi,cts)
"""
print("Topic proportion at convergence is")
print(GEM_expectation(tau1d[n.newaxis,:], tau2d[n.newaxis,:], self._K))
"""
## ELBO as function of global params (for optimized local params)
globELBO = self.ELBOglobal(ids, cts, phi, tau1d, tau2d)
## invariant: sum of all sstats entries equal sum of cts
assert abs(n.sum(sstats) - n.sum(cts) < 0.001), "sstats invariant doesn't hold!"
return ((phi, tau1d, tau2d), lbound, globELBO, changes, sstats)
def debug_update_lambda(self, ids, cts):
"""
Inputs:
ids, cts: list of unique word indices and their counts
Outputs:
"""
# rhot will be between 0 and 1, and says how much to weight
# the information we got from this mini-batch.
rhot = pow(self._tau0 + self._updatect, -self._kappa)
self._rhot = rhot
# Do an E step to update gamma, phi | lambda for this
# mini-batch. This also returns the information about phi that
# we need to update lambda.
varparams, lbound, oldELBO, changes, sstats = self.debug_e_step(ids, cts)
# Update lambda based on documents.
self._lambda = self._lambda * (1-rhot) + \
rhot * (self._eta + self._D * sstats)
self._Elogbeta = dirichlet_expectation(self._lambda)
self._expElogbeta = n.exp(self._Elogbeta)
newELBO = self.ELBOglobal(ids, cts, varparams[0], varparams[1], varparams[2])
self._updatect += 1
print("Change in ELBO caused by %d-th update is %.2f" %(self._updatect, newELBO-oldELBO))
return varparams
def theta_means(self, wordobs_ids, wordobs_cts):
"""
Inputs:
wordobs_ids = list
wordobs_cts = list
Outputs:
Report E(q(theta(k)) across topics, where q(theta) is variational
approximation of the new document's topic proportions.
Remarks:
"""
taus, _ = self.do_e_step([wordobs_ids],[wordobs_cts])
theta = GEM_expectation(taus[0], taus[1], self._K) # each's shape is 1 x self._K
return theta
def sanity_E_step(seed, K, topicpath):
"""
Examine effect of SB-LDA E-step's on a document
Inputs:
seed: seed for replicability
K: number of topics
topicpath: file path of pre-trained topics for warm-start training
Outputs:
"""
## load topics
inroot = "wiki10k"
infile = inroot + "_wordids.csv"
with open(infile) as f:
D = sum(1 for line in f)
vocab = open('./dictnostops.txt').readlines()
tm = SB_LDA(vocab, K, topicpath, D, 1, 0.01, 1024., 0.7)
## E-step on a document, plotting initial guess of topic proportions
## as well as their convergence, but make no updates to underlying topics
n.random.seed(seed)
(wordids, wordcts) = \
get_batch_from_disk(inroot, D, 6)
fig1, axes1 = plt.subplots(2,3,figsize=(12,8))
fig2, axes2 = plt.subplots(2,3,figsize=(12,8))
for i in range(6):
cidx = i % 3
ridx = i // 3
_, elbo1, oldbound1, changes1, sstats1 = tm.debug_e_step(wordids[i], wordcts[i], "tau")
_, elbo2, oldbound2, changes2, sstats2 = tm.debug_e_step(wordids[i], wordcts[i], "phi")
# ELBO
axes1[ridx, cidx].plot(elbo1, marker='o', markersize=2, label="tau")
axes1[ridx, cidx].plot(elbo2, marker='o', markersize=2, label="phi")
axes1[ridx, cidx].set_xlabel("Iteration")
axes1[ridx, cidx].set_ylabel("ELBO")
axes1[ridx, cidx].set_title("Document %i contains %d words" %(i, sum(wordcts[i])))
axes1[ridx, cidx].legend()
# change in tau
axes2[ridx, cidx].plot(changes1, marker='o', markersize=2, label="tau")
axes2[ridx, cidx].plot(changes2, marker='o', markersize=2, label="phi")
axes2[ridx, cidx].set_xlabel("Iteration")
axes2[ridx, cidx].set_ylabel("Mean change in tau")
axes2[ridx, cidx].set_title("Document %i contains %d words" %(i, sum(wordcts[i])))
axes2[ridx, cidx].legend()
fig1.tight_layout()
fig2.tight_layout()
plt.show()
return
def sanity_M_step(seed, K, topicpath):
## load topics
inroot = "wiki10k"
infile = inroot + "_wordids.csv"
with open(infile) as f:
D = sum(1 for line in f)
vocab = open('./dictnostops.txt').readlines()
tm = SB_LDA(vocab, K, topicpath, D, 1, 0.01, 1024., 0.7)
## Do 10 M-steps based on 10 sampled documents. All steps
## print out the change in ELBO as function of topics after
## the update: should always be positive!
n.random.seed(seed)
(wordids, wordcts) = \
get_batch_from_disk(inroot, D, 10)
for i in range(10):
_ = tm.debug_update_lambda(wordids[i], wordcts[i])
return
def main():
sanity_E_step(12, 100, "results/sbldaK100_D16_wiki10k_wiki1k/lambda-100.dat")
sanity_M_step(1, 100, None)
return
if __name__ == '__main__':
main()