Skip to content

Autodetect languages not working with continuous speech translation #2875

Open
@Tryptophan

Description

@Tryptophan

IN ORDER TO ASSIST YOU, PLEASE PROVIDE THE FOLLOWING:

Describe the bug

When using speech translation with AutoDetectSourceLanguageConfig the translator will fail to connect. This should be a supported feature as seen in the github example here: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/translation_sample.py#L234-L311

To Reproduce

Download the wav file, then add your speech key and endpoint to the reproduction code shown below. When you run it you should see a translation canceled error.

import azure.cognitiveservices.speech as speechsdk
import time

# ADD KEY/REGION HERE
azure_speech_key = ""
azure_speech_region = ""

input_file = "./samples_python_console_en-us_zh-cn.wav"

def main():

    audio_config = speechsdk.audio.AudioConfig(filename=input_file)

    translation_config = speechsdk.translation.SpeechTranslationConfig(
        subscription=azure_speech_key,
        region=azure_speech_region,
        target_languages=["en", "zh-Hans"],
    )


    translation_config.set_property(
        property_id=speechsdk.PropertyId.SpeechServiceConnection_LanguageIdMode,
        value="Continuous",
    )

    # Specify the AutoDetectSourceLanguageConfig, which defines the number of possible languages
    auto_detect_source_language_config = (
        speechsdk.languageconfig.AutoDetectSourceLanguageConfig(
            languages=["en-US", "zh-CN"]
        )
    )

    recognizer = speechsdk.translation.TranslationRecognizer(
        translation_config=translation_config,
        audio_config=audio_config,
        auto_detect_source_language_config=auto_detect_source_language_config,
    )


    def result_callback(evt):
        """callback to display a translation result"""
        if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech:
            src_lang = evt.result.properties[
                speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult
            ]
            print(
                """Recognized:
            Detected language: {}
            Recognition result: {}
            English translation: {}
            Chinese translation: {}""".format(
                    src_lang,
                    evt.result.text,
                    evt.result.translations["en"],
                    evt.result.translations["zh-Hans"],
                )
            )
        elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
            print("Recognized:\n {}".format(evt.result.text))
        elif evt.result.reason == speechsdk.ResultReason.NoMatch:
            print("No speech could be recognized: {}".format(evt.result.no_match_details))
        elif evt.result.reason == speechsdk.ResultReason.Canceled:
            print("Translation canceled: {}".format(evt.result.cancellation_details.reason))
            if evt.result.cancellation_details.reason == speechsdk.CancellationReason.Error:
                print(
                    "Error details: {}".format(
                        evt.result.cancellation_details.error_details
                    )
                )


    done = False


    def stop_cb(evt):
        """callback that signals to stop continuous recognition upon receiving an event `evt`"""
        print("CLOSING on {}".format(evt))
        nonlocal done
        done = True

    # connect callback functions to the events fired by the recognizer
    recognizer.session_started.connect(
        lambda evt: print("SESSION STARTED: {}".format(evt))
    )
    recognizer.session_stopped.connect(
        lambda evt: print("SESSION STOPPED {}".format(evt))
    )

    # event for final result
    recognizer.recognized.connect(lambda evt: result_callback(evt))

    # cancellation event
    recognizer.canceled.connect(
        lambda evt: print("CANCELED: {} ({})".format(evt, evt.reason))
    )

    # stop continuous recognition on either session stopped or canceled events
    recognizer.session_stopped.connect(stop_cb)
    recognizer.canceled.connect(stop_cb)

    # start translation
    recognizer.start_continuous_recognition()

    while not done:
        time.sleep(0.5)

    recognizer.stop_continuous_recognition()

if __name__ == "__main__":
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions