diff --git a/samples/batch/python/README.md b/samples/batch/python/README.md index 5e5377e2f..409b99173 100644 --- a/samples/batch/python/README.md +++ b/samples/batch/python/README.md @@ -8,25 +8,13 @@ Follow these steps for the installation: 1. Go to https://editor.swagger.io. 1. Click **File**, then click **Import URL**. -1. Enter the Swagger URL including the region for your Speech Services subscription: `https://.cris.ai/docs/v2.0/swagger`. +1. Enter the Swagger URL for the Speech Services API: `https://westus.dev.cognitive.microsoft.com/docs/services/speech-to-text-api-v3-0/export?DocumentFormat=Swagger&ApiName=Speech%20to%20Text%20API%20v3.0`. 1. Click **Generate Client** and select **Python**. 1. Save the client library. 1. Extract the downloaded python-client-generated.zip somewhere in your file system. 1. Install the extracted python-client module in your Python environment using pip: `pip install path/to/package/python-client`. 1. The installed package has the name `swagger_client`. You can check that the installation worked using the command `python -c "import swagger_client"`. -> **Note:** -> Due to a [known bug in the Swagger autogeneration](https://github.com/swagger-api/swagger-codegen/issues/7541), you might encounter errors on importing the `swagger_client` package. -> These can be fixed by deleting the line with the content -> ```py -> from swagger_client.models.model import Model # noqa: F401,E501 -> ``` -> from the file `swagger_client/models/model.py` and the line with the content -> ```py -> from swagger_client.models.inner_error import InnerError # noqa: F401,E501 -> ``` -> from the file `swagger_client/models/inner_error.py` inside the installed package. The error message will tell you where these files are located for your installation. - ## Install other dependencies The sample uses the `requests` library. You can install it with the command @@ -37,12 +25,13 @@ pip install requests ## Run the sample code -The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.5 or higher. +The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.7 or higher. You will need to adapt the following information to run the sample: 1. Your subscription key and region. 1. The URI of an audio recording in blob storage. -1. (Optional:) The model IDs of both an adapted acoustic and language model, if you want to use a custom model. +1. (Optional:) The model ID of an adapted model, if you want to use a custom model. +1. (Optional:) The URI of a container with audio files if you want to transcribe all of them with a single request. You can use a development environment like PyCharm to edit, debug, and execute the sample. diff --git a/samples/batch/python/python-client/main.py b/samples/batch/python/python-client/main.py index c752e8d4a..5dc3942ba 100644 --- a/samples/batch/python/python-client/main.py +++ b/samples/batch/python/python-client/main.py @@ -4,16 +4,14 @@ # Copyright (c) Microsoft. All rights reserved. # Licensed under the MIT license. See LICENSE.md file in the project root for full license information. -from typing import List - import logging import sys import requests import time import swagger_client as cris_client - -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s") +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, + format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z") # Your subscription key and region for the speech service SUBSCRIPTION_KEY = "YourSubscriptionKey" @@ -25,9 +23,106 @@ LOCALE = "en-US" RECORDINGS_BLOB_URI = "" -# Set subscription information when doing transcription with custom models -ADAPTED_ACOUSTIC_ID = None # guid of a custom acoustic model -ADAPTED_LANGUAGE_ID = None # guid of a custom language model +# Provide the uri of a container with audio files for transcribing all of them with a single request +RECORDINGS_CONTAINER_URI = "" + +# Set model information when doing transcription with custom models +MODEL_REFERENCE = None # guid of a custom model + + +def transcribe_from_single_blob(uri, properties): + """ + Transcribe a single audio file located at `uri` using the settings specified in `properties` + using the base model for the specified locale. + """ + transcription_definition = cris_client.Transcription( + display_name=NAME, + description=DESCRIPTION, + locale=LOCALE, + content_urls=[uri], + properties=properties + ) + + return transcription_definition + + +def transcribe_with_custom_model(api, uri, properties): + """ + Transcribe a single audio file located at `uri` using the settings specified in `properties` + using the base model for the specified locale. + """ + # Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above. + if MODEL_REFERENCE is None: + logging.error("Custom model ids must be set when using custom models") + sys.exit() + + model = api.get_model(MODEL_REFERENCE) + + transcription_definition = cris_client.Transcription( + display_name=NAME, + description=DESCRIPTION, + locale=LOCALE, + content_urls=[uri], + model=model, + properties=properties + ) + + return transcription_definition + + +def transcribe_from_container(uri, properties): + """ + Transcribe all files in the container located at `uri` using the settings specified in `properties` + using the base model for the specified locale. + """ + transcription_definition = cris_client.Transcription( + display_name=NAME, + description=DESCRIPTION, + locale=LOCALE, + content_container_url=uri, + properties=properties + ) + + return transcription_definition + + +def _paginate(api, paginated_object): + """ + The autogenerated client does not support pagination. This function returns a generator over + all items of the array that the paginated object `paginated_object` is part of. + """ + yield from paginated_object.values + typename = type(paginated_object).__name__ + auth_settings = ["apiKeyHeader", "apiKeyQuery"] + while paginated_object.next_link: + link = paginated_object.next_link[len(api.api_client.configuration.host):] + paginated_object, status, headers = api.api_client.call_api(link, "GET", + response_type=typename, auth_settings=auth_settings) + + if status == 200: + yield from paginated_object.values + else: + raise Exception(f"could not receive paginated data: status {status}") + + +def delete_all_transcriptions(api): + """ + Delete all transcriptions associated with your speech resource. + """ + logging.info("Deleting all existing completed transcriptions.") + + # get all transcriptions for the subscription + transcriptions = list(_paginate(api, api.get_transcriptions())) + + # Delete all pre-existing completed transcriptions. + # If transcriptions are still running or not started, they will not be deleted. + for transcription in transcriptions: + transcription_id = transcription._self.split('/')[-1] + logging.debug(f"Deleting transcription with id {transcription_id}") + try: + api.delete_transcription(transcription_id) + except cris_client.rest.ApiException as exc: + logging.error(f"Could not delete transcription {transcription_id}: {exc}") def transcribe(): @@ -35,108 +130,73 @@ def transcribe(): # configure API key authorization: subscription_key configuration = cris_client.Configuration() - configuration.api_key['Ocp-Apim-Subscription-Key'] = SUBSCRIPTION_KEY - configuration.host = "https://{}.cris.ai".format(SERVICE_REGION) + configuration.api_key["Ocp-Apim-Subscription-Key"] = SUBSCRIPTION_KEY + configuration.host = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.0" # create the client object and authenticate client = cris_client.ApiClient(configuration) # create an instance of the transcription api class - transcription_api = cris_client.CustomSpeechTranscriptionsApi(api_client=client) - - # get all transcriptions for the subscription - transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions() - - logging.info("Deleting all existing completed transcriptions.") - - # delete all pre-existing completed transcriptions - # if transcriptions are still running or not started, they will not be deleted - for transcription in transcriptions: - try: - transcription_api.delete_transcription(transcription.id) - except ValueError: - # ignore swagger error on empty response message body: https://github.com/swagger-api/swagger-core/issues/2446 - pass + api = cris_client.DefaultApi(api_client=client) # Specify transcription properties by passing a dict to the properties parameter. See # https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-transcription#configuration-properties # for supported parameters. properties = { - # 'PunctuationMode': 'DictatedAndAutomatic', - # 'ProfanityFilterMode': 'Masked', - # 'AddWordLevelTimestamps': 'False', - # 'AddDiarization': 'False', - # 'AddSentiment': False, - # 'TranscriptionResultsContainerUrl': "" + # "punctuationMode": "DictatedAndAutomatic", + # "profanityFilterMode": "Masked", + # "wordLevelTimestampsEnabled": True, + # "diarizationEnabled": True, + # "destinationContainerUrl": "", + # "timeToLive": "PT1H" } # Use base models for transcription. Comment this block if you are using a custom model. - transcription_definition = cris_client.TranscriptionDefinition( - name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI, - properties=properties - ) + transcription_definition = transcribe_from_single_blob(RECORDINGS_BLOB_URI, properties) # Uncomment this block to use custom models for transcription. - # Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above. - # if ADAPTED_ACOUSTIC_ID is None or ADAPTED_LANGUAGE_ID is None: - # logging.info("Custom model ids must be set to when using custom models") - # transcription_definition = cris_client.TranscriptionDefinition( - # name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI, - # models=[cris_client.ModelIdentity(ADAPTED_ACOUSTIC_ID), cris_client.ModelIdentity(ADAPTED_LANGUAGE_ID)], - # properties=properties - # ) + # transcription_definition = transcribe_with_custom_model(api, RECORDINGS_BLOB_URI, properties) - data, status, headers = transcription_api.create_transcription_with_http_info(transcription_definition) + # Uncomment this block to transcribe all files from a container. + # transcription_definition = transcribe_from_container(RECORDINGS_CONTAINER_URI, properties) - # extract transcription location from the headers - transcription_location: str = headers["location"] + created_transcription, status, headers = api.create_transcription_with_http_info(transcription=transcription_definition) # get the transcription Id from the location URI - created_transcription: str = transcription_location.split('/')[-1] + transcription_id = headers["location"].split("/")[-1] - logging.info("Created new transcription with id {}".format(created_transcription)) + # Log information about the created transcription. If you should ask for support, please + # include this information. + logging.info(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}") logging.info("Checking status.") completed = False while not completed: - running, not_started = 0, 0 + # wait for 5 seconds before refreshing the transcription status + time.sleep(5) - # get all transcriptions for the user - transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions() + transcription = api.get_transcription(transcription_id) + logging.info(f"Transcriptions status: {transcription.status}") - # for each transcription in the list we check the status - for transcription in transcriptions: - if transcription.status in ("Failed", "Succeeded"): - # we check to see if it was the transcription we created from this client - if created_transcription != transcription.id: - continue + if transcription.status in ("Failed", "Succeeded"): + completed = True - completed = True - - if transcription.status == "Succeeded": - results_uri = transcription.results_urls["channel_0"] - results = requests.get(results_uri) - logging.info("Transcription succeeded. Results: ") - logging.info(results.content.decode("utf-8")) - else: - logging.info("Transcription failed :{}.".format(transcription.status_message)) - break - elif transcription.status == "Running": - running += 1 - elif transcription.status == "NotStarted": - not_started += 1 - - logging.info("Transcriptions status: " - "completed (this transcription): {}, {} running, {} not started yet".format( - completed, running, not_started)) - - # wait for 5 seconds - time.sleep(5) + if transcription.status == "Succeeded": + pag_files = api.get_transcription_files(transcription_id) + for file_data in _paginate(api, pag_files): + if file_data.kind != "Transcription": + continue - input("Press any key...") + audiofilename = file_data.name + results_url = file_data.links.content_url + results = requests.get(results_url) + logging.info(f"Results for {audiofilename}:\n{results.content.decode('utf-8')}") + elif transcription.status == "Failed": + logging.info(f"Transcription failed: {transcription.properties.error.message}") if __name__ == "__main__": transcribe() +