Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace cantonese g2p with "ToJyutping" #1697

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions GPT_SoVITS/text/cantonese.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import sys
import re
import cn2an
import ToJyutping

from pyjyutping import jyutping
from text.symbols import punctuation
from text.zh_normalization.text_normlization import TextNormalizer

Expand Down Expand Up @@ -173,12 +173,24 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):


def get_jyutping(text):
jp = jyutping.convert(text)
# print(1111111,jp)
for symbol in punctuation:
jp = jp.replace(symbol, " " + symbol + " ")
jp_array = jp.split()
return jp_array
jyutping_array = []
punct_pattern = re.compile(r"^[{}]+$".format(re.escape("".join(punctuation))))

syllables = ToJyutping.get_jyutping_list(text)

for word, syllable in syllables:
if punct_pattern.match(word):
puncts = re.split(r"([{}])".format(re.escape("".join(punctuation))), word)
for punct in puncts:
if len(punct) > 0:
jyutping_array.append(punct)
else:
# match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", syllable):
raise ValueError(f"Failed to convert {word} to jyutping: {syllable}")
jyutping_array.append(syllable)

return jyutping_array


def get_bert_feature(text, word2ph):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ LangSegment>=0.2.0
Faster_Whisper
wordsegment
rotary_embedding_torch
pyjyutping
ToJyutping
g2pk2
ko_pron
opencc; sys_platform != 'linux'
Expand Down