-
Notifications
You must be signed in to change notification settings - Fork 12
Open
Description
Functions similar to: https://azure.microsoft.com/en-us/services/cognitive-services/text-to-speech/#features
I made a simple sketch with this page and included another paragraph of reference data
roleList = ['Default', 'YoungAdultFemale', 'YoungAdultMale', 'OlderAdultFemale', 'OlderAdultMale', 'SeniorFemale', 'SeniorMale', 'Girl', 'Boy']
voices = {
'zh-CN': {
'zh-CN-YunyeNeural': {
'name': 'Yunye',
'style': ['General', 'angry', 'calm', 'cheerful', 'disgruntled', 'embarrassed', 'fearful', 'sad', 'serious'],
'role': roleList,
'style_support': 'Optimized for story narrating, multiple role-play and styles available using SSML'},
'zh-CN-XiaoxiaoNeural': {
'name': 'Xiaoxiao',
'style': ['General', 'affectionate', 'angry', 'assistant', 'calm', 'chat', 'cheerful', 'customerservice', 'disgruntled', 'fearful', 'gentle', 'lyrical', 'newscast', 'sad', 'serious'],
'role': None,
'style_support': 'General, multiple voice styles available using SSML'},
'zh-CN-XiaohanNeural': {
'name': 'Xiaohan',
'style': ['General', 'affectionate', 'angry', 'calm', 'cheerful', 'disgruntled', 'embarrassed', 'fearful', 'gentle', 'sad', 'serious'],
'role': None,
'style_support': 'General, multiple styles available using SSML'},
'zh-CN-XiaoruiNeural': {
'name': 'Xiaorui',
'style': ['General', 'angry', 'calm', 'fearful', 'sad'],
'role': None,
'style_support': 'Senior voice, multiple styles available using SSML'},
'zh-CN-XiaomoNeural': {
'name': 'Xiaomo',
'style': ['General', 'affectionate', 'angry', 'calm', 'cheerful', 'depressed', 'disgruntled', 'embarrassed', 'envious', 'fearful', 'gentle', 'sad', 'serious'],
'role': roleList,
'style_support': 'General, multiple role-play and styles available using SSML'},
'zh-CN-XiaochenNeural': {
'name': 'Xiaochen',
'style': None,
'role': None,
'style_support': 'Optimized for spontaneous conversation'},
'zh-CN-XiaoqiuNeural': {
'name': 'Xiaoqiu',
'style': None,
'role': None,
'style_support': 'Optimized for narrating'}
},
'en-US': {
'en-US-GuyNeural': {
'name': 'Guy',
'style': ['General', 'newscast'],
'role': None,
'style_support': 'General, multiple voice styles available using SSML'},
'en-US-AriaNeural': {
'name': 'Aria',
'style': ['General', 'chat', 'cheerful', 'customerservice', 'empathetic', 'narration-professional', 'newscast-casual', 'newscast-formal'],
'role': None, 'style_support': 'General, multiple voice styles available using SSML'},
'en-US-JennyNeural': {
'name': 'Jenny',
'style': ['General', 'assistant', 'chat', 'customerservice', 'newscast'],
'role': None,
'style_support': 'General, multiple voice styles available using SSML'},
'en-US-SaraNeural': {
'name': 'Sara',
'style': ['General', 'angry', 'cheerful', 'sad'],
'role': None,
'style_support': 'General, multiple voice styles available using SSML'}
}
}
# role、style Data from:https://azure.microsoft.com/en-us/services/cognitive-services/text-to-speech/#features
# Style_support Data from:https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#text-to-speech
Supports accepting command line arguments to start: srt_file
,out_file
, voice
.
A SSML template needs to be built in, e.g.
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="{lang}">
<voice name="{voice}">
<mstts:express-as role="{role}" style="{style}" styledegree="{styledegree}">
<prosody pitch="{pitch}" rate="{rate}" volume="{volume}">
{text}
</prosody>
</mstts:express-as>
</voice>
</speak>
Metadata
Metadata
Assignees
Labels
No labels