-
Notifications
You must be signed in to change notification settings - Fork 290
/
demo_chinese.py
29 lines (22 loc) · 1.16 KB
/
demo_chinese.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from __future__ import print_function
import pandas as pd
from scattertext import CorpusFromParsedDocuments, produce_scattertext_explorer
from scattertext import chinese_nlp
# compare chinese translations of tale of two cities and ulysses, from http://www.pku.edu.cn/study/novel/ulysses/cindex.htm
def main():
df = pd.read_csv('https://cdn.rawgit.com/JasonKessler/scattertext/e508bf32/scattertext/data/chinese.csv')
df['text'] = df['text'].apply(chinese_nlp)
corpus = CorpusFromParsedDocuments(df,
category_col='novel',
parsed_col='text').build()
html = produce_scattertext_explorer(corpus,
category='Tale of Two Cities',
category_name='Tale of Two Cities',
not_category_name='Ulysses',
width_in_pixels=1000,
metadata=df['novel'],
asian_mode=True)
open('./demo_chinese.html', 'w').write(html)
print('Open ./demo_chinese.html in Chrome or Firefox.')
if __name__ == '__main__':
main()