Open
Description
IN ORDER TO ASSIST YOU, PLEASE PROVIDE THE FOLLOWING:
Describe the bug
When using speech translation with AutoDetectSourceLanguageConfig
the translator will fail to connect. This should be a supported feature as seen in the github example here: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/translation_sample.py#L234-L311
To Reproduce
Download the wav file, then add your speech key and endpoint to the reproduction code shown below. When you run it you should see a translation canceled error.
import azure.cognitiveservices.speech as speechsdk
import time
# ADD KEY/REGION HERE
azure_speech_key = ""
azure_speech_region = ""
input_file = "./samples_python_console_en-us_zh-cn.wav"
def main():
audio_config = speechsdk.audio.AudioConfig(filename=input_file)
translation_config = speechsdk.translation.SpeechTranslationConfig(
subscription=azure_speech_key,
region=azure_speech_region,
target_languages=["en", "zh-Hans"],
)
translation_config.set_property(
property_id=speechsdk.PropertyId.SpeechServiceConnection_LanguageIdMode,
value="Continuous",
)
# Specify the AutoDetectSourceLanguageConfig, which defines the number of possible languages
auto_detect_source_language_config = (
speechsdk.languageconfig.AutoDetectSourceLanguageConfig(
languages=["en-US", "zh-CN"]
)
)
recognizer = speechsdk.translation.TranslationRecognizer(
translation_config=translation_config,
audio_config=audio_config,
auto_detect_source_language_config=auto_detect_source_language_config,
)
def result_callback(evt):
"""callback to display a translation result"""
if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech:
src_lang = evt.result.properties[
speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult
]
print(
"""Recognized:
Detected language: {}
Recognition result: {}
English translation: {}
Chinese translation: {}""".format(
src_lang,
evt.result.text,
evt.result.translations["en"],
evt.result.translations["zh-Hans"],
)
)
elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
print("Recognized:\n {}".format(evt.result.text))
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized: {}".format(evt.result.no_match_details))
elif evt.result.reason == speechsdk.ResultReason.Canceled:
print("Translation canceled: {}".format(evt.result.cancellation_details.reason))
if evt.result.cancellation_details.reason == speechsdk.CancellationReason.Error:
print(
"Error details: {}".format(
evt.result.cancellation_details.error_details
)
)
done = False
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print("CLOSING on {}".format(evt))
nonlocal done
done = True
# connect callback functions to the events fired by the recognizer
recognizer.session_started.connect(
lambda evt: print("SESSION STARTED: {}".format(evt))
)
recognizer.session_stopped.connect(
lambda evt: print("SESSION STOPPED {}".format(evt))
)
# event for final result
recognizer.recognized.connect(lambda evt: result_callback(evt))
# cancellation event
recognizer.canceled.connect(
lambda evt: print("CANCELED: {} ({})".format(evt, evt.reason))
)
# stop continuous recognition on either session stopped or canceled events
recognizer.session_stopped.connect(stop_cb)
recognizer.canceled.connect(stop_cb)
# start translation
recognizer.start_continuous_recognition()
while not done:
time.sleep(0.5)
recognizer.stop_continuous_recognition()
if __name__ == "__main__":
main()
Metadata
Metadata
Assignees
Labels
No labels