update Python batch sample to use v3 API (#736)

chlandsi · web-flow · commit 2e09feb28949 · 2020-07-21T11:57:09.000+02:00
diff --git a/samples/batch/python/README.md b/samples/batch/python/README.md
@@ -8,25 +8,13 @@ Follow these steps for the installation:
 
 1. Go to https://editor.swagger.io.
 1. Click **File**, then click **Import URL**.
-1. Enter the Swagger URL including the region for your Speech Services subscription: `https://<your-region>.cris.ai/docs/v2.0/swagger`.
+1. Enter the Swagger URL for the Speech Services API: `https://westus.dev.cognitive.microsoft.com/docs/services/speech-to-text-api-v3-0/export?DocumentFormat=Swagger&ApiName=Speech%20to%20Text%20API%20v3.0`.
 1. Click **Generate Client** and select **Python**.
 1. Save the client library.
 1. Extract the downloaded python-client-generated.zip somewhere in your file system.
 1. Install the extracted python-client module in your Python environment using pip: `pip install path/to/package/python-client`.
 1. The installed package has the name `swagger_client`. You can check that the installation worked using the command `python -c "import swagger_client"`.
 
-> **Note:**
-> Due to a [known bug in the Swagger autogeneration](https://github.com/swagger-api/swagger-codegen/issues/7541), you might encounter errors on importing the `swagger_client` package.
-> These can be fixed by deleting the line with the content
-> ```py
-> from swagger_client.models.model import Model  # noqa: F401,E501
-> ```
-> from the file `swagger_client/models/model.py` and the line with the content
-> ```py
-> from swagger_client.models.inner_error import InnerError  # noqa: F401,E501
-> ```
-> from the file `swagger_client/models/inner_error.py` inside the installed package. The error message will tell you where these files are located for your installation.
-
 ## Install other dependencies
 
 The sample uses the `requests` library. You can install it with the command
@@ -37,12 +25,13 @@ pip install requests
 
 ## Run the sample code
 
-The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.5 or higher.
+The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.7 or higher.
 You will need to adapt the following information to run the sample:
 
 1. Your subscription key and region.
 1. The URI of an audio recording in blob storage.
-1. (Optional:) The model IDs of both an adapted acoustic and language model, if you want to use a custom model.
+1. (Optional:) The model ID of an adapted model, if you want to use a custom model.
+1. (Optional:) The URI of a container with audio files if you want to transcribe all of them with a single request.
 
 You can use a development environment like PyCharm to edit, debug, and execute the sample.
 
diff --git a/samples/batch/python/python-client/main.py b/samples/batch/python/python-client/main.py
@@ -4,16 +4,14 @@
 # Copyright (c) Microsoft. All rights reserved.
 # Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 
-from typing import List
-
 import logging
 import sys
 import requests
 import time
 import swagger_client as cris_client
 
-
-logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s")
+logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
+        format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z")
 
 # Your subscription key and region for the speech service
 SUBSCRIPTION_KEY = "YourSubscriptionKey"
@@ -25,118 +23,180 @@
 LOCALE = "en-US"
 RECORDINGS_BLOB_URI = "<Your SAS Uri to the recording>"
 
-# Set subscription information when doing transcription with custom models
-ADAPTED_ACOUSTIC_ID = None  # guid of a custom acoustic model
-ADAPTED_LANGUAGE_ID = None  # guid of a custom language model
+# Provide the uri of a container with audio files for transcribing all of them with a single request
+RECORDINGS_CONTAINER_URI = "<Your SAS Uri to a container of audio files>"
+
+# Set model information when doing transcription with custom models
+MODEL_REFERENCE = None  # guid of a custom model
+
+
+def transcribe_from_single_blob(uri, properties):
+    """
+    Transcribe a single audio file located at `uri` using the settings specified in `properties`
+    using the base model for the specified locale.
+    """
+    transcription_definition = cris_client.Transcription(
+        display_name=NAME,
+        description=DESCRIPTION,
+        locale=LOCALE,
+        content_urls=[uri],
+        properties=properties
+    )
+
+    return transcription_definition
+
+
+def transcribe_with_custom_model(api, uri, properties):
+    """
+    Transcribe a single audio file located at `uri` using the settings specified in `properties`
+    using the base model for the specified locale.
+    """
+    # Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above.
+    if MODEL_REFERENCE is None:
+        logging.error("Custom model ids must be set when using custom models")
+        sys.exit()
+
+    model = api.get_model(MODEL_REFERENCE)
+
+    transcription_definition = cris_client.Transcription(
+        display_name=NAME,
+        description=DESCRIPTION,
+        locale=LOCALE,
+        content_urls=[uri],
+        model=model,
+        properties=properties
+    )
+
+    return transcription_definition
+
+
+def transcribe_from_container(uri, properties):
+    """
+    Transcribe all files in the container located at `uri` using the settings specified in `properties`
+    using the base model for the specified locale.
+    """
+    transcription_definition = cris_client.Transcription(
+        display_name=NAME,
+        description=DESCRIPTION,
+        locale=LOCALE,
+        content_container_url=uri,
+        properties=properties
+    )
+
+    return transcription_definition
+
+
+def _paginate(api, paginated_object):
+    """
+    The autogenerated client does not support pagination. This function returns a generator over
+    all items of the array that the paginated object `paginated_object` is part of.
+    """
+    yield from paginated_object.values
+    typename = type(paginated_object).__name__
+    auth_settings = ["apiKeyHeader", "apiKeyQuery"]
+    while paginated_object.next_link:
+        link = paginated_object.next_link[len(api.api_client.configuration.host):]
+        paginated_object, status, headers = api.api_client.call_api(link, "GET",
+            response_type=typename, auth_settings=auth_settings)
+
+        if status == 200:
+            yield from paginated_object.values
+        else:
+            raise Exception(f"could not receive paginated data: status {status}")
+
+
+def delete_all_transcriptions(api):
+    """
+    Delete all transcriptions associated with your speech resource.
+    """
+    logging.info("Deleting all existing completed transcriptions.")
+
+    # get all transcriptions for the subscription
+    transcriptions = list(_paginate(api, api.get_transcriptions()))
+
+    # Delete all pre-existing completed transcriptions.
+    # If transcriptions are still running or not started, they will not be deleted.
+    for transcription in transcriptions:
+        transcription_id = transcription._self.split('/')[-1]
+        logging.debug(f"Deleting transcription with id {transcription_id}")
+        try:
+            api.delete_transcription(transcription_id)
+        except cris_client.rest.ApiException as exc:
+            logging.error(f"Could not delete transcription {transcription_id}: {exc}")
 
 
 def transcribe():
     logging.info("Starting transcription client...")
 
     # configure API key authorization: subscription_key
     configuration = cris_client.Configuration()
-    configuration.api_key['Ocp-Apim-Subscription-Key'] = SUBSCRIPTION_KEY
-    configuration.host = "https://{}.cris.ai".format(SERVICE_REGION)
+    configuration.api_key["Ocp-Apim-Subscription-Key"] = SUBSCRIPTION_KEY
+    configuration.host = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.0"
 
     # create the client object and authenticate
     client = cris_client.ApiClient(configuration)
 
     # create an instance of the transcription api class
-    transcription_api = cris_client.CustomSpeechTranscriptionsApi(api_client=client)
-
-    # get all transcriptions for the subscription
-    transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()
-
-    logging.info("Deleting all existing completed transcriptions.")
-
-    # delete all pre-existing completed transcriptions
-    # if transcriptions are still running or not started, they will not be deleted
-    for transcription in transcriptions:
-        try:
-            transcription_api.delete_transcription(transcription.id)
-        except ValueError:
-            # ignore swagger error on empty response message body: https://github.com/swagger-api/swagger-core/issues/2446
-            pass
+    api = cris_client.DefaultApi(api_client=client)
 
     # Specify transcription properties by passing a dict to the properties parameter. See
     # https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-transcription#configuration-properties
     # for supported parameters.
     properties = {
-        # 'PunctuationMode': 'DictatedAndAutomatic',
-        # 'ProfanityFilterMode': 'Masked',
-        # 'AddWordLevelTimestamps': 'False',
-        # 'AddDiarization': 'False',
-        # 'AddSentiment': False,
-        # 'TranscriptionResultsContainerUrl': "<results container>"
+        # "punctuationMode": "DictatedAndAutomatic",
+        # "profanityFilterMode": "Masked",
+        # "wordLevelTimestampsEnabled": True,
+        # "diarizationEnabled": True,
+        # "destinationContainerUrl": "<results container>",
+        # "timeToLive": "PT1H"
     }
 
     # Use base models for transcription. Comment this block if you are using a custom model.
-    transcription_definition = cris_client.TranscriptionDefinition(
-        name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
-        properties=properties
-    )
+    transcription_definition = transcribe_from_single_blob(RECORDINGS_BLOB_URI, properties)
 
     # Uncomment this block to use custom models for transcription.
-    # Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above.
-    # if ADAPTED_ACOUSTIC_ID is None or ADAPTED_LANGUAGE_ID is None:
-    #     logging.info("Custom model ids must be set to when using custom models")
-    # transcription_definition = cris_client.TranscriptionDefinition(
-    #     name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
-    #     models=[cris_client.ModelIdentity(ADAPTED_ACOUSTIC_ID), cris_client.ModelIdentity(ADAPTED_LANGUAGE_ID)],
-    #     properties=properties
-    # )
+    # transcription_definition = transcribe_with_custom_model(api, RECORDINGS_BLOB_URI, properties)
 
-    data, status, headers = transcription_api.create_transcription_with_http_info(transcription_definition)
+    # Uncomment this block to transcribe all files from a container.
+    # transcription_definition = transcribe_from_container(RECORDINGS_CONTAINER_URI, properties)
 
-    # extract transcription location from the headers
-    transcription_location: str = headers["location"]
+    created_transcription, status, headers = api.create_transcription_with_http_info(transcription=transcription_definition)
 
     # get the transcription Id from the location URI
-    created_transcription: str = transcription_location.split('/')[-1]
+    transcription_id = headers["location"].split("/")[-1]
 
-    logging.info("Created new transcription with id {}".format(created_transcription))
+    # Log information about the created transcription. If you should ask for support, please
+    # include this information.
+    logging.info(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}")
 
     logging.info("Checking status.")
 
     completed = False
 
     while not completed:
-        running, not_started = 0, 0
+        # wait for 5 seconds before refreshing the transcription status
+        time.sleep(5)
 
-        # get all transcriptions for the user
-        transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()
+        transcription = api.get_transcription(transcription_id)
+        logging.info(f"Transcriptions status: {transcription.status}")
 
-        # for each transcription in the list we check the status
-        for transcription in transcriptions:
-            if transcription.status in ("Failed", "Succeeded"):
-                # we check to see if it was the transcription we created from this client
-                if created_transcription != transcription.id:
-                    continue
+        if transcription.status in ("Failed", "Succeeded"):
+            completed = True
 
-                completed = True
-
-                if transcription.status == "Succeeded":
-                    results_uri = transcription.results_urls["channel_0"]
-                    results = requests.get(results_uri)
-                    logging.info("Transcription succeeded. Results: ")
-                    logging.info(results.content.decode("utf-8"))
-                else:
-                    logging.info("Transcription failed :{}.".format(transcription.status_message))
-                    break
-            elif transcription.status == "Running":
-                running += 1
-            elif transcription.status == "NotStarted":
-                not_started += 1
-
-        logging.info("Transcriptions status: "
-                     "completed (this transcription): {}, {} running, {} not started yet".format(
-                         completed, running, not_started))
-
-        # wait for 5 seconds
-        time.sleep(5)
+        if transcription.status == "Succeeded":
+            pag_files = api.get_transcription_files(transcription_id)
+            for file_data in _paginate(api, pag_files):
+                if file_data.kind != "Transcription":
+                    continue
 
-    input("Press any key...")
+                audiofilename = file_data.name
+                results_url = file_data.links.content_url
+                results = requests.get(results_url)
+                logging.info(f"Results for {audiofilename}:\n{results.content.decode('utf-8')}")
+        elif transcription.status == "Failed":
+            logging.info(f"Transcription failed: {transcription.properties.error.message}")
 
 
 if __name__ == "__main__":
     transcribe()
+