Skip to content

Commit 233808c

Browse files
committed
increase default N_words in _tf_idf
1 parent c7fd223 commit 233808c

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

stream_topic/preprocessor/_tf_idf.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def c_tf_idf(documents, m, ngram_range=(1, 1)):
2121
w = t.sum(axis=1)
2222

2323
# Suppress divide by zero warning
24-
with np.errstate(divide='ignore', invalid='ignore'):
24+
with np.errstate(divide="ignore", invalid="ignore"):
2525
tf = np.divide(t.T, w)
2626
if np.any(np.isnan(tf)) or np.any(np.isinf(tf)):
2727
logger.warning("NaNs or inf in tf matrix")
@@ -34,7 +34,7 @@ def c_tf_idf(documents, m, ngram_range=(1, 1)):
3434
return tf_idf, count
3535

3636

37-
def extract_tfidf_topics(tf_idf, count, docs_per_topic, n=10):
37+
def extract_tfidf_topics(tf_idf, count, docs_per_topic, n=100):
3838
"""class based tf_idf retrieval from cluster of documents
3939
4040
Args:
@@ -51,8 +51,7 @@ def extract_tfidf_topics(tf_idf, count, docs_per_topic, n=10):
5151
tf_idf_transposed = tf_idf.T
5252
indices = tf_idf_transposed.argsort()[:, -n:]
5353
top_n_words = {
54-
label: [((words[j]), (tf_idf_transposed[i][j]))
55-
for j in indices[i]][::-1]
54+
label: [((words[j]), (tf_idf_transposed[i][j])) for j in indices[i]][::-1]
5655
for i, label in enumerate(labels)
5756
}
5857

0 commit comments

Comments
 (0)