-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathpinyin_dict.py
36 lines (26 loc) · 954 Bytes
/
pinyin_dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import collections
import pypinyin
from pypinyin.pinyin_dict import pinyin_dict
re_bpmf_tones = re.compile('[ˉˊˇˋ˙]')
re_special_py = re.compile("(ń|ň|ǹ|m̄|ḿ|m̀|ê̄|ế|ê̌|ề)")
all_py = set()
all_bpmf = set()
for ch, pys in pinyin_dict.items():
for py in pys.split(','):
if re_special_py.search(py):
continue
all_py.add(pypinyin.style.convert(py, pypinyin.NORMAL, strict=True))
all_py.add(pypinyin.style.convert(py, pypinyin.TONE, strict=True))
bpmf = pypinyin.style.convert(py, pypinyin.BOPOMOFO, strict=True)
all_bpmf.add(bpmf)
all_bpmf.add(re_bpmf_tones.sub('', bpmf))
with open('pinyin.txt', 'w', encoding='utf-8') as f:
f.write('\n'.join(sorted(all_py)))
f.write('\n')
with open('bopomofo.txt', 'w', encoding='utf-8') as f:
f.write('\n'.join(sorted(all_bpmf)))
f.write('\n')