@@ -21,7 +21,7 @@ def c_tf_idf(documents, m, ngram_range=(1, 1)):
2121 w = t .sum (axis = 1 )
2222
2323 # Suppress divide by zero warning
24- with np .errstate (divide = ' ignore' , invalid = ' ignore' ):
24+ with np .errstate (divide = " ignore" , invalid = " ignore" ):
2525 tf = np .divide (t .T , w )
2626 if np .any (np .isnan (tf )) or np .any (np .isinf (tf )):
2727 logger .warning ("NaNs or inf in tf matrix" )
@@ -34,7 +34,7 @@ def c_tf_idf(documents, m, ngram_range=(1, 1)):
3434 return tf_idf , count
3535
3636
37- def extract_tfidf_topics (tf_idf , count , docs_per_topic , n = 10 ):
37+ def extract_tfidf_topics (tf_idf , count , docs_per_topic , n = 100 ):
3838 """class based tf_idf retrieval from cluster of documents
3939
4040 Args:
@@ -51,8 +51,7 @@ def extract_tfidf_topics(tf_idf, count, docs_per_topic, n=10):
5151 tf_idf_transposed = tf_idf .T
5252 indices = tf_idf_transposed .argsort ()[:, - n :]
5353 top_n_words = {
54- label : [((words [j ]), (tf_idf_transposed [i ][j ]))
55- for j in indices [i ]][::- 1 ]
54+ label : [((words [j ]), (tf_idf_transposed [i ][j ])) for j in indices [i ]][::- 1 ]
5655 for i , label in enumerate (labels )
5756 }
5857
0 commit comments