Skip to content

Commit c420a2d

Browse files
Merge pull request #43 from x-tabdeveloping/chinese
Added option for specifying custom font for word clouds
2 parents 66861e7 + 6e40a4a commit c420a2d

File tree

14 files changed

+109
-20
lines changed

14 files changed

+109
-20
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ Pretty and opinionated topic model visualization in Python.
1919

2020
https://github.com/x-tabdeveloping/topicwizard/assets/13087737/9736f33c-6865-4ed4-bc17-d8e6369bda80
2121

22+
## New in version 1.1.3
23+
24+
You can now specify your own font that should be used for wordclouds.
25+
This makes topicwizard usable with Chinese and other non-indo-european scripts.
26+
27+
```python
28+
topicwizard.visualize(topic_data=topic_data, wordcloud_font_path="NotoSansTC-Bold.ttf")
29+
```
2230

2331
## New in version 1.1.0 🌟
2432

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "topic-wizard"
3-
version = "1.1.2"
3+
version = "1.1.3"
44
description = "Pretty and opinionated topic model visualization in Python."
55
authors = ["Márton Kardos <[email protected]>"]
66
license = "MIT"

topicwizard/app.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def get_dash_app(
3333
exclude_pages: Optional[Set[PageName]] = None,
3434
document_names: Optional[List[str]] = None,
3535
group_labels: Optional[List[str]] = None,
36+
wordcloud_font_path: Optional[str] = None,
3637
) -> Dash:
3738
"""Returns topicwizard Dash application.
3839
@@ -50,6 +51,9 @@ def get_dash_app(
5051
You can pass it along if you have genre labels for example.
5152
In this case an additional page will get created with information
5253
about how these groups relate to topics and words in the corpus.
54+
wordcloud_font_path: str, default None
55+
Font to use for generating wordclouds.
56+
Open Sans is used by default.
5357
5458
Returns
5559
-------
@@ -64,6 +68,7 @@ def get_dash_app(
6468
or [f"Document {i}" for i, _ in enumerate(topic_data["corpus"])],
6569
group_labels=group_labels,
6670
exclude_pages=exclude_pages,
71+
wordcloud_font_path=wordcloud_font_path,
6772
)
6873
app = Dash(
6974
__name__,
@@ -82,7 +87,9 @@ def get_dash_app(
8287
return app
8388

8489

85-
def load_app(filename: str, exclude_pages: Optional[Iterable[PageName]] = None) -> Dash:
90+
def load_app(
91+
filename: str, exclude_pages: Optional[Iterable[PageName]] = None, **kwargs
92+
) -> Dash:
8693
"""Loads and prepares saved app from disk.
8794
8895
Parameters
@@ -100,7 +107,7 @@ def load_app(filename: str, exclude_pages: Optional[Iterable[PageName]] = None)
100107
exclude_pages = set()
101108
else:
102109
exclude_pages = set(exclude_pages)
103-
return get_dash_app(**data, exclude_pages=exclude_pages)
110+
return get_dash_app(**data, exclude_pages=exclude_pages, **kwargs)
104111

105112

106113
def open_url(url: str) -> None:
@@ -156,6 +163,7 @@ def load(
156163
filename: str,
157164
exclude_pages: Optional[Iterable[PageName]] = None,
158165
port: int = 8050,
166+
**kwargs,
159167
) -> Optional[threading.Thread]:
160168
"""Visualizes topic model data loaded from disk.
161169
@@ -179,7 +187,7 @@ def load(
179187
"""
180188
print("Preparing data")
181189
exclude_pages = set() if exclude_pages is None else set(exclude_pages)
182-
app = load_app(filename, exclude_pages=exclude_pages)
190+
app = load_app(filename, exclude_pages=exclude_pages, **kwargs)
183191
return run_app(app, port=port)
184192

185193

@@ -211,6 +219,7 @@ def visualize(
211219
exclude_pages: Optional[Iterable[PageName]] = None,
212220
group_labels: Optional[List[str]] = None,
213221
port: int = 8050,
222+
wordcloud_font_path: Optional[str] = None,
214223
**kwargs,
215224
) -> Optional[threading.Thread]:
216225
"""Visualizes your topic model with topicwizard.
@@ -238,7 +247,9 @@ def visualize(
238247
You can pass it along if you have genre labels for example.
239248
In this case an additional page will get created with information
240249
about how these groups relate to topics and words in the corpus.
241-
250+
wordcloud_font_path: str, default None
251+
Font to use for generating wordclouds.
252+
Open Sans is used by default.
242253
243254
Returns
244255
-------
@@ -278,5 +289,6 @@ def visualize(
278289
document_names=document_names,
279290
exclude_pages=exclude_pages,
280291
group_labels=group_labels,
292+
wordcloud_font_path=wordcloud_font_path,
281293
)
282294
return run_app(app, port=port)

topicwizard/blueprints/app.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def create_blueprint(
3131
word_positions: Optional[np.ndarray] = None,
3232
topic_positions: Optional[np.ndarray] = None,
3333
document_positions: Optional[np.ndarray] = None,
34+
wordcloud_font_path: Optional[str] = None,
3435
) -> DashBlueprint:
3536
# --------[ Collecting blueprints ]--------
3637
topic_blueprint = (
@@ -43,6 +44,7 @@ def create_blueprint(
4344
corpus=corpus,
4445
topic_names=topic_names,
4546
topic_positions=topic_positions,
47+
wordcloud_font_path=wordcloud_font_path,
4648
)
4749
if "topics" not in exclude_pages
4850
else create_blank_page("topics")
@@ -59,6 +61,7 @@ def create_blueprint(
5961
corpus=corpus,
6062
topic_names=topic_names,
6163
document_positions=document_positions,
64+
wordcloud_font_path=wordcloud_font_path,
6265
)
6366
if "documents" not in exclude_pages
6467
else create_blank_page("documents")
@@ -87,6 +90,7 @@ def create_blueprint(
8790
corpus=corpus,
8891
topic_names=topic_names,
8992
group_labels=group_labels,
93+
wordcloud_font_path=wordcloud_font_path,
9094
)
9195
if group_labels is not None
9296
else create_blank_page("groups")

topicwizard/blueprints/groups.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, List
1+
from typing import Any, List, Optional
22

33
import dash_mantine_components as dmc
44
import numpy as np
@@ -10,7 +10,8 @@
1010
import topicwizard.prepare.groups as prepare
1111
from topicwizard.components.groups.group_barplot import create_group_barplot
1212
from topicwizard.components.groups.group_map import create_group_map
13-
from topicwizard.components.groups.group_wordcloud import create_group_wordcloud
13+
from topicwizard.components.groups.group_wordcloud import \
14+
create_group_wordcloud
1415
from topicwizard.help.utils import make_helper
1516

1617

@@ -20,6 +21,7 @@ def create_blueprint(
2021
document_topic_matrix: np.ndarray,
2122
topic_term_matrix: np.ndarray,
2223
group_labels: List[str],
24+
wordcloud_font_path: Optional[str] = None,
2325
**kwargs,
2426
) -> DashBlueprint:
2527
# --------[ Preparing data ]--------
@@ -47,7 +49,9 @@ def create_blueprint(
4749
group_map = create_group_map(
4850
group_positions, group_importances, group_names, dominant_topics, topic_colors
4951
)
50-
group_wordcloud = create_group_wordcloud(group_term_importances, vocab)
52+
group_wordcloud = create_group_wordcloud(
53+
group_term_importances, vocab, wordcloud_font_path=wordcloud_font_path
54+
)
5155
group_barchart = create_group_barplot(group_topic_importances, topic_colors)
5256
blueprints = [
5357
group_map,

topicwizard/blueprints/topics.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def create_blueprint(
5050
topic_term_matrix: np.ndarray,
5151
topic_names: List[str],
5252
topic_positions: Optional[np.ndarray] = None,
53+
wordcloud_font_path: Optional[str] = None,
5354
**kwargs,
5455
) -> DashBlueprint:
5556
# --------[ Preparing data ]--------
@@ -61,7 +62,9 @@ def create_blueprint(
6162
topic_positions, topic_importances, topic_names
6263
)
6364
topic_barplot = create_topic_barplot(topic_term_matrix, vocab)
64-
wordcloud = create_wordcloud(topic_term_matrix, vocab)
65+
wordcloud = create_wordcloud(
66+
topic_term_matrix, vocab, wordcloud_font_path=wordcloud_font_path
67+
)
6568
blueprints = [
6669
intertopic_map,
6770
topic_switcher,

topicwizard/components/groups/group_wordcloud.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
def create_group_wordcloud(
12-
group_term_importances: np.ndarray, vocab: np.ndarray
12+
group_term_importances: np.ndarray, vocab: np.ndarray, wordcloud_font_path=None
1313
) -> DashBlueprint:
1414
group_wordcloud = DashBlueprint()
1515

@@ -25,6 +25,6 @@ def create_group_wordcloud(
2525
)
2626
def update_plot(selected_group: int) -> go.Figure:
2727
top_words = prepare.top_words(selected_group, 60, group_term_importances, vocab)
28-
return plots.wordcloud(top_words)
28+
return plots.wordcloud(top_words, custom_font_path=wordcloud_font_path)
2929

3030
return group_wordcloud

topicwizard/components/topics/wordcloud.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import topicwizard.prepare.topics as prepare
88

99

10-
def create_wordcloud(topic_term_matrix, vocab):
10+
def create_wordcloud(topic_term_matrix, vocab, wordcloud_font_path=None):
1111
wordcloud = DashBlueprint()
1212
top_bar = prepare.calculate_top_words(
1313
topic_id=0,
@@ -32,6 +32,6 @@ def update(current_topic: int) -> go.Figure:
3232
components=topic_term_matrix,
3333
vocab=vocab,
3434
)
35-
return plots.wordcloud(top_bar)
35+
return plots.wordcloud(top_bar, custom_font_path=wordcloud_font_path)
3636

3737
return wordcloud

topicwizard/figures/groups.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,12 @@ def group_topic_barcharts(
163163

164164

165165
def group_wordclouds(
166-
topic_data: TopicData, group_labels: List[str], top_n: int = 30, n_columns: int = 4
166+
topic_data: TopicData,
167+
group_labels: List[str],
168+
top_n: int = 30,
169+
n_columns: int = 4,
170+
custom_font_path: str = None,
171+
color_scheme: str = "twilight",
167172
) -> go.Figure:
168173
"""Plots wordclouds for each group.
169174
@@ -177,6 +182,10 @@ def group_wordclouds(
177182
Number of words to display for each group.
178183
n_columns: int, default 4
179184
Number of columns the faceted plot should have.
185+
custom_font_path: str, default None
186+
Path to custom font to use to render the wordcloud.
187+
color_scheme: str, default 'twilight'
188+
Matplotlib color scheme to use for the plot.
180189
"""
181190
# Factorizing group labels
182191
group_id_labels, group_names = pd.factorize(group_labels)
@@ -203,7 +212,9 @@ def group_wordclouds(
203212
top_words = prepare.top_words(
204213
group_id, top_n, group_term_importances, topic_data["vocab"]
205214
)
206-
subfig = plots.wordcloud(top_words)
215+
subfig = plots.wordcloud(
216+
top_words, color_scheme=color_scheme, custom_font_path=custom_font_path
217+
)
207218
row, column = (group_id // n_columns) + 1, (group_id % n_columns) + 1
208219
fig.add_trace(subfig.data[0], row=row, col=column)
209220
fig.update_layout(

topicwizard/figures/topics.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def topic_wordclouds(
111111
top_n: int = 30,
112112
n_columns: int = 4,
113113
color_scheme: str = "copper",
114+
custom_font_path=None,
114115
) -> go.Figure:
115116
"""Plots most relevant words as word clouds for every topic.
116117
@@ -124,6 +125,8 @@ def topic_wordclouds(
124125
Number of columns in the subplot grid.
125126
color_scheme: str, default 'copper'
126127
Matplotlib color scheme to use for the wordcloud.
128+
custom_font_path: str, default None
129+
Path to custom font to use to render the wordcloud.
127130
"""
128131
n_topics = topic_data["topic_term_matrix"].shape[0]
129132
(
@@ -150,7 +153,9 @@ def topic_wordclouds(
150153
components=topic_term_importances,
151154
vocab=topic_data["vocab"],
152155
)
153-
subfig = plots.wordcloud(top_words, color_scheme=color_scheme)
156+
subfig = plots.wordcloud(
157+
top_words, color_scheme=color_scheme, custom_font_path=custom_font_path
158+
)
154159
row, column = (topic_id // n_columns) + 1, (topic_id % n_columns) + 1
155160
fig.add_trace(subfig.data[0], row=row, col=column)
156161
fig.update_layout(

0 commit comments

Comments
 (0)