Skip to content

Commit

Permalink
update Python batch sample to use v3 API (#736)
Browse files Browse the repository at this point in the history
  • Loading branch information
chlandsi authored Jul 21, 2020
1 parent 16c980b commit 2e09feb
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 93 deletions.
19 changes: 4 additions & 15 deletions samples/batch/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,13 @@ Follow these steps for the installation:

1. Go to https://editor.swagger.io.
1. Click **File**, then click **Import URL**.
1. Enter the Swagger URL including the region for your Speech Services subscription: `https://<your-region>.cris.ai/docs/v2.0/swagger`.
1. Enter the Swagger URL for the Speech Services API: `https://westus.dev.cognitive.microsoft.com/docs/services/speech-to-text-api-v3-0/export?DocumentFormat=Swagger&ApiName=Speech%20to%20Text%20API%20v3.0`.
1. Click **Generate Client** and select **Python**.
1. Save the client library.
1. Extract the downloaded python-client-generated.zip somewhere in your file system.
1. Install the extracted python-client module in your Python environment using pip: `pip install path/to/package/python-client`.
1. The installed package has the name `swagger_client`. You can check that the installation worked using the command `python -c "import swagger_client"`.

> **Note:**
> Due to a [known bug in the Swagger autogeneration](https://github.com/swagger-api/swagger-codegen/issues/7541), you might encounter errors on importing the `swagger_client` package.
> These can be fixed by deleting the line with the content
> ```py
> from swagger_client.models.model import Model # noqa: F401,E501
> ```
> from the file `swagger_client/models/model.py` and the line with the content
> ```py
> from swagger_client.models.inner_error import InnerError # noqa: F401,E501
> ```
> from the file `swagger_client/models/inner_error.py` inside the installed package. The error message will tell you where these files are located for your installation.
## Install other dependencies

The sample uses the `requests` library. You can install it with the command
Expand All @@ -37,12 +25,13 @@ pip install requests

## Run the sample code

The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.5 or higher.
The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.7 or higher.
You will need to adapt the following information to run the sample:

1. Your subscription key and region.
1. The URI of an audio recording in blob storage.
1. (Optional:) The model IDs of both an adapted acoustic and language model, if you want to use a custom model.
1. (Optional:) The model ID of an adapted model, if you want to use a custom model.
1. (Optional:) The URI of a container with audio files if you want to transcribe all of them with a single request.

You can use a development environment like PyCharm to edit, debug, and execute the sample.

216 changes: 138 additions & 78 deletions samples/batch/python/python-client/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,14 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.

from typing import List

import logging
import sys
import requests
import time
import swagger_client as cris_client


logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s")
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z")

# Your subscription key and region for the speech service
SUBSCRIPTION_KEY = "YourSubscriptionKey"
Expand All @@ -25,118 +23,180 @@
LOCALE = "en-US"
RECORDINGS_BLOB_URI = "<Your SAS Uri to the recording>"

# Set subscription information when doing transcription with custom models
ADAPTED_ACOUSTIC_ID = None # guid of a custom acoustic model
ADAPTED_LANGUAGE_ID = None # guid of a custom language model
# Provide the uri of a container with audio files for transcribing all of them with a single request
RECORDINGS_CONTAINER_URI = "<Your SAS Uri to a container of audio files>"

# Set model information when doing transcription with custom models
MODEL_REFERENCE = None # guid of a custom model


def transcribe_from_single_blob(uri, properties):
"""
Transcribe a single audio file located at `uri` using the settings specified in `properties`
using the base model for the specified locale.
"""
transcription_definition = cris_client.Transcription(
display_name=NAME,
description=DESCRIPTION,
locale=LOCALE,
content_urls=[uri],
properties=properties
)

return transcription_definition


def transcribe_with_custom_model(api, uri, properties):
"""
Transcribe a single audio file located at `uri` using the settings specified in `properties`
using the base model for the specified locale.
"""
# Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above.
if MODEL_REFERENCE is None:
logging.error("Custom model ids must be set when using custom models")
sys.exit()

model = api.get_model(MODEL_REFERENCE)

transcription_definition = cris_client.Transcription(
display_name=NAME,
description=DESCRIPTION,
locale=LOCALE,
content_urls=[uri],
model=model,
properties=properties
)

return transcription_definition


def transcribe_from_container(uri, properties):
"""
Transcribe all files in the container located at `uri` using the settings specified in `properties`
using the base model for the specified locale.
"""
transcription_definition = cris_client.Transcription(
display_name=NAME,
description=DESCRIPTION,
locale=LOCALE,
content_container_url=uri,
properties=properties
)

return transcription_definition


def _paginate(api, paginated_object):
"""
The autogenerated client does not support pagination. This function returns a generator over
all items of the array that the paginated object `paginated_object` is part of.
"""
yield from paginated_object.values
typename = type(paginated_object).__name__
auth_settings = ["apiKeyHeader", "apiKeyQuery"]
while paginated_object.next_link:
link = paginated_object.next_link[len(api.api_client.configuration.host):]
paginated_object, status, headers = api.api_client.call_api(link, "GET",
response_type=typename, auth_settings=auth_settings)

if status == 200:
yield from paginated_object.values
else:
raise Exception(f"could not receive paginated data: status {status}")


def delete_all_transcriptions(api):
"""
Delete all transcriptions associated with your speech resource.
"""
logging.info("Deleting all existing completed transcriptions.")

# get all transcriptions for the subscription
transcriptions = list(_paginate(api, api.get_transcriptions()))

# Delete all pre-existing completed transcriptions.
# If transcriptions are still running or not started, they will not be deleted.
for transcription in transcriptions:
transcription_id = transcription._self.split('/')[-1]
logging.debug(f"Deleting transcription with id {transcription_id}")
try:
api.delete_transcription(transcription_id)
except cris_client.rest.ApiException as exc:
logging.error(f"Could not delete transcription {transcription_id}: {exc}")


def transcribe():
logging.info("Starting transcription client...")

# configure API key authorization: subscription_key
configuration = cris_client.Configuration()
configuration.api_key['Ocp-Apim-Subscription-Key'] = SUBSCRIPTION_KEY
configuration.host = "https://{}.cris.ai".format(SERVICE_REGION)
configuration.api_key["Ocp-Apim-Subscription-Key"] = SUBSCRIPTION_KEY
configuration.host = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.0"

# create the client object and authenticate
client = cris_client.ApiClient(configuration)

# create an instance of the transcription api class
transcription_api = cris_client.CustomSpeechTranscriptionsApi(api_client=client)

# get all transcriptions for the subscription
transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()

logging.info("Deleting all existing completed transcriptions.")

# delete all pre-existing completed transcriptions
# if transcriptions are still running or not started, they will not be deleted
for transcription in transcriptions:
try:
transcription_api.delete_transcription(transcription.id)
except ValueError:
# ignore swagger error on empty response message body: https://github.com/swagger-api/swagger-core/issues/2446
pass
api = cris_client.DefaultApi(api_client=client)

# Specify transcription properties by passing a dict to the properties parameter. See
# https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-transcription#configuration-properties
# for supported parameters.
properties = {
# 'PunctuationMode': 'DictatedAndAutomatic',
# 'ProfanityFilterMode': 'Masked',
# 'AddWordLevelTimestamps': 'False',
# 'AddDiarization': 'False',
# 'AddSentiment': False,
# 'TranscriptionResultsContainerUrl': "<results container>"
# "punctuationMode": "DictatedAndAutomatic",
# "profanityFilterMode": "Masked",
# "wordLevelTimestampsEnabled": True,
# "diarizationEnabled": True,
# "destinationContainerUrl": "<results container>",
# "timeToLive": "PT1H"
}

# Use base models for transcription. Comment this block if you are using a custom model.
transcription_definition = cris_client.TranscriptionDefinition(
name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
properties=properties
)
transcription_definition = transcribe_from_single_blob(RECORDINGS_BLOB_URI, properties)

# Uncomment this block to use custom models for transcription.
# Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above.
# if ADAPTED_ACOUSTIC_ID is None or ADAPTED_LANGUAGE_ID is None:
# logging.info("Custom model ids must be set to when using custom models")
# transcription_definition = cris_client.TranscriptionDefinition(
# name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
# models=[cris_client.ModelIdentity(ADAPTED_ACOUSTIC_ID), cris_client.ModelIdentity(ADAPTED_LANGUAGE_ID)],
# properties=properties
# )
# transcription_definition = transcribe_with_custom_model(api, RECORDINGS_BLOB_URI, properties)

data, status, headers = transcription_api.create_transcription_with_http_info(transcription_definition)
# Uncomment this block to transcribe all files from a container.
# transcription_definition = transcribe_from_container(RECORDINGS_CONTAINER_URI, properties)

# extract transcription location from the headers
transcription_location: str = headers["location"]
created_transcription, status, headers = api.create_transcription_with_http_info(transcription=transcription_definition)

# get the transcription Id from the location URI
created_transcription: str = transcription_location.split('/')[-1]
transcription_id = headers["location"].split("/")[-1]

logging.info("Created new transcription with id {}".format(created_transcription))
# Log information about the created transcription. If you should ask for support, please
# include this information.
logging.info(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}")

logging.info("Checking status.")

completed = False

while not completed:
running, not_started = 0, 0
# wait for 5 seconds before refreshing the transcription status
time.sleep(5)

# get all transcriptions for the user
transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()
transcription = api.get_transcription(transcription_id)
logging.info(f"Transcriptions status: {transcription.status}")

# for each transcription in the list we check the status
for transcription in transcriptions:
if transcription.status in ("Failed", "Succeeded"):
# we check to see if it was the transcription we created from this client
if created_transcription != transcription.id:
continue
if transcription.status in ("Failed", "Succeeded"):
completed = True

completed = True

if transcription.status == "Succeeded":
results_uri = transcription.results_urls["channel_0"]
results = requests.get(results_uri)
logging.info("Transcription succeeded. Results: ")
logging.info(results.content.decode("utf-8"))
else:
logging.info("Transcription failed :{}.".format(transcription.status_message))
break
elif transcription.status == "Running":
running += 1
elif transcription.status == "NotStarted":
not_started += 1

logging.info("Transcriptions status: "
"completed (this transcription): {}, {} running, {} not started yet".format(
completed, running, not_started))

# wait for 5 seconds
time.sleep(5)
if transcription.status == "Succeeded":
pag_files = api.get_transcription_files(transcription_id)
for file_data in _paginate(api, pag_files):
if file_data.kind != "Transcription":
continue

input("Press any key...")
audiofilename = file_data.name
results_url = file_data.links.content_url
results = requests.get(results_url)
logging.info(f"Results for {audiofilename}:\n{results.content.decode('utf-8')}")
elif transcription.status == "Failed":
logging.info(f"Transcription failed: {transcription.properties.error.message}")


if __name__ == "__main__":
transcribe()

0 comments on commit 2e09feb

Please sign in to comment.