Skip to content

Commit 2e09feb

Browse files
authored
update Python batch sample to use v3 API (#736)
1 parent 16c980b commit 2e09feb

File tree

2 files changed

+142
-93
lines changed

2 files changed

+142
-93
lines changed

samples/batch/python/README.md

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,13 @@ Follow these steps for the installation:
88

99
1. Go to https://editor.swagger.io.
1010
1. Click **File**, then click **Import URL**.
11-
1. Enter the Swagger URL including the region for your Speech Services subscription: `https://<your-region>.cris.ai/docs/v2.0/swagger`.
11+
1. Enter the Swagger URL for the Speech Services API: `https://westus.dev.cognitive.microsoft.com/docs/services/speech-to-text-api-v3-0/export?DocumentFormat=Swagger&ApiName=Speech%20to%20Text%20API%20v3.0`.
1212
1. Click **Generate Client** and select **Python**.
1313
1. Save the client library.
1414
1. Extract the downloaded python-client-generated.zip somewhere in your file system.
1515
1. Install the extracted python-client module in your Python environment using pip: `pip install path/to/package/python-client`.
1616
1. The installed package has the name `swagger_client`. You can check that the installation worked using the command `python -c "import swagger_client"`.
1717

18-
> **Note:**
19-
> Due to a [known bug in the Swagger autogeneration](https://github.com/swagger-api/swagger-codegen/issues/7541), you might encounter errors on importing the `swagger_client` package.
20-
> These can be fixed by deleting the line with the content
21-
> ```py
22-
> from swagger_client.models.model import Model # noqa: F401,E501
23-
> ```
24-
> from the file `swagger_client/models/model.py` and the line with the content
25-
> ```py
26-
> from swagger_client.models.inner_error import InnerError # noqa: F401,E501
27-
> ```
28-
> from the file `swagger_client/models/inner_error.py` inside the installed package. The error message will tell you where these files are located for your installation.
29-
3018
## Install other dependencies
3119

3220
The sample uses the `requests` library. You can install it with the command
@@ -37,12 +25,13 @@ pip install requests
3725

3826
## Run the sample code
3927

40-
The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.5 or higher.
28+
The sample code itself is [main.py](python-client/main.py) and can be run using Python 3.7 or higher.
4129
You will need to adapt the following information to run the sample:
4230

4331
1. Your subscription key and region.
4432
1. The URI of an audio recording in blob storage.
45-
1. (Optional:) The model IDs of both an adapted acoustic and language model, if you want to use a custom model.
33+
1. (Optional:) The model ID of an adapted model, if you want to use a custom model.
34+
1. (Optional:) The URI of a container with audio files if you want to transcribe all of them with a single request.
4635

4736
You can use a development environment like PyCharm to edit, debug, and execute the sample.
4837

samples/batch/python/python-client/main.py

Lines changed: 138 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,14 @@
44
# Copyright (c) Microsoft. All rights reserved.
55
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
66

7-
from typing import List
8-
97
import logging
108
import sys
119
import requests
1210
import time
1311
import swagger_client as cris_client
1412

15-
16-
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s")
13+
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
14+
format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z")
1715

1816
# Your subscription key and region for the speech service
1917
SUBSCRIPTION_KEY = "YourSubscriptionKey"
@@ -25,118 +23,180 @@
2523
LOCALE = "en-US"
2624
RECORDINGS_BLOB_URI = "<Your SAS Uri to the recording>"
2725

28-
# Set subscription information when doing transcription with custom models
29-
ADAPTED_ACOUSTIC_ID = None # guid of a custom acoustic model
30-
ADAPTED_LANGUAGE_ID = None # guid of a custom language model
26+
# Provide the uri of a container with audio files for transcribing all of them with a single request
27+
RECORDINGS_CONTAINER_URI = "<Your SAS Uri to a container of audio files>"
28+
29+
# Set model information when doing transcription with custom models
30+
MODEL_REFERENCE = None # guid of a custom model
31+
32+
33+
def transcribe_from_single_blob(uri, properties):
34+
"""
35+
Transcribe a single audio file located at `uri` using the settings specified in `properties`
36+
using the base model for the specified locale.
37+
"""
38+
transcription_definition = cris_client.Transcription(
39+
display_name=NAME,
40+
description=DESCRIPTION,
41+
locale=LOCALE,
42+
content_urls=[uri],
43+
properties=properties
44+
)
45+
46+
return transcription_definition
47+
48+
49+
def transcribe_with_custom_model(api, uri, properties):
50+
"""
51+
Transcribe a single audio file located at `uri` using the settings specified in `properties`
52+
using the base model for the specified locale.
53+
"""
54+
# Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above.
55+
if MODEL_REFERENCE is None:
56+
logging.error("Custom model ids must be set when using custom models")
57+
sys.exit()
58+
59+
model = api.get_model(MODEL_REFERENCE)
60+
61+
transcription_definition = cris_client.Transcription(
62+
display_name=NAME,
63+
description=DESCRIPTION,
64+
locale=LOCALE,
65+
content_urls=[uri],
66+
model=model,
67+
properties=properties
68+
)
69+
70+
return transcription_definition
71+
72+
73+
def transcribe_from_container(uri, properties):
74+
"""
75+
Transcribe all files in the container located at `uri` using the settings specified in `properties`
76+
using the base model for the specified locale.
77+
"""
78+
transcription_definition = cris_client.Transcription(
79+
display_name=NAME,
80+
description=DESCRIPTION,
81+
locale=LOCALE,
82+
content_container_url=uri,
83+
properties=properties
84+
)
85+
86+
return transcription_definition
87+
88+
89+
def _paginate(api, paginated_object):
90+
"""
91+
The autogenerated client does not support pagination. This function returns a generator over
92+
all items of the array that the paginated object `paginated_object` is part of.
93+
"""
94+
yield from paginated_object.values
95+
typename = type(paginated_object).__name__
96+
auth_settings = ["apiKeyHeader", "apiKeyQuery"]
97+
while paginated_object.next_link:
98+
link = paginated_object.next_link[len(api.api_client.configuration.host):]
99+
paginated_object, status, headers = api.api_client.call_api(link, "GET",
100+
response_type=typename, auth_settings=auth_settings)
101+
102+
if status == 200:
103+
yield from paginated_object.values
104+
else:
105+
raise Exception(f"could not receive paginated data: status {status}")
106+
107+
108+
def delete_all_transcriptions(api):
109+
"""
110+
Delete all transcriptions associated with your speech resource.
111+
"""
112+
logging.info("Deleting all existing completed transcriptions.")
113+
114+
# get all transcriptions for the subscription
115+
transcriptions = list(_paginate(api, api.get_transcriptions()))
116+
117+
# Delete all pre-existing completed transcriptions.
118+
# If transcriptions are still running or not started, they will not be deleted.
119+
for transcription in transcriptions:
120+
transcription_id = transcription._self.split('/')[-1]
121+
logging.debug(f"Deleting transcription with id {transcription_id}")
122+
try:
123+
api.delete_transcription(transcription_id)
124+
except cris_client.rest.ApiException as exc:
125+
logging.error(f"Could not delete transcription {transcription_id}: {exc}")
31126

32127

33128
def transcribe():
34129
logging.info("Starting transcription client...")
35130

36131
# configure API key authorization: subscription_key
37132
configuration = cris_client.Configuration()
38-
configuration.api_key['Ocp-Apim-Subscription-Key'] = SUBSCRIPTION_KEY
39-
configuration.host = "https://{}.cris.ai".format(SERVICE_REGION)
133+
configuration.api_key["Ocp-Apim-Subscription-Key"] = SUBSCRIPTION_KEY
134+
configuration.host = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.0"
40135

41136
# create the client object and authenticate
42137
client = cris_client.ApiClient(configuration)
43138

44139
# create an instance of the transcription api class
45-
transcription_api = cris_client.CustomSpeechTranscriptionsApi(api_client=client)
46-
47-
# get all transcriptions for the subscription
48-
transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()
49-
50-
logging.info("Deleting all existing completed transcriptions.")
51-
52-
# delete all pre-existing completed transcriptions
53-
# if transcriptions are still running or not started, they will not be deleted
54-
for transcription in transcriptions:
55-
try:
56-
transcription_api.delete_transcription(transcription.id)
57-
except ValueError:
58-
# ignore swagger error on empty response message body: https://github.com/swagger-api/swagger-core/issues/2446
59-
pass
140+
api = cris_client.DefaultApi(api_client=client)
60141

61142
# Specify transcription properties by passing a dict to the properties parameter. See
62143
# https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-transcription#configuration-properties
63144
# for supported parameters.
64145
properties = {
65-
# 'PunctuationMode': 'DictatedAndAutomatic',
66-
# 'ProfanityFilterMode': 'Masked',
67-
# 'AddWordLevelTimestamps': 'False',
68-
# 'AddDiarization': 'False',
69-
# 'AddSentiment': False,
70-
# 'TranscriptionResultsContainerUrl': "<results container>"
146+
# "punctuationMode": "DictatedAndAutomatic",
147+
# "profanityFilterMode": "Masked",
148+
# "wordLevelTimestampsEnabled": True,
149+
# "diarizationEnabled": True,
150+
# "destinationContainerUrl": "<results container>",
151+
# "timeToLive": "PT1H"
71152
}
72153

73154
# Use base models for transcription. Comment this block if you are using a custom model.
74-
transcription_definition = cris_client.TranscriptionDefinition(
75-
name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
76-
properties=properties
77-
)
155+
transcription_definition = transcribe_from_single_blob(RECORDINGS_BLOB_URI, properties)
78156

79157
# Uncomment this block to use custom models for transcription.
80-
# Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above.
81-
# if ADAPTED_ACOUSTIC_ID is None or ADAPTED_LANGUAGE_ID is None:
82-
# logging.info("Custom model ids must be set to when using custom models")
83-
# transcription_definition = cris_client.TranscriptionDefinition(
84-
# name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI,
85-
# models=[cris_client.ModelIdentity(ADAPTED_ACOUSTIC_ID), cris_client.ModelIdentity(ADAPTED_LANGUAGE_ID)],
86-
# properties=properties
87-
# )
158+
# transcription_definition = transcribe_with_custom_model(api, RECORDINGS_BLOB_URI, properties)
88159

89-
data, status, headers = transcription_api.create_transcription_with_http_info(transcription_definition)
160+
# Uncomment this block to transcribe all files from a container.
161+
# transcription_definition = transcribe_from_container(RECORDINGS_CONTAINER_URI, properties)
90162

91-
# extract transcription location from the headers
92-
transcription_location: str = headers["location"]
163+
created_transcription, status, headers = api.create_transcription_with_http_info(transcription=transcription_definition)
93164

94165
# get the transcription Id from the location URI
95-
created_transcription: str = transcription_location.split('/')[-1]
166+
transcription_id = headers["location"].split("/")[-1]
96167

97-
logging.info("Created new transcription with id {}".format(created_transcription))
168+
# Log information about the created transcription. If you should ask for support, please
169+
# include this information.
170+
logging.info(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}")
98171

99172
logging.info("Checking status.")
100173

101174
completed = False
102175

103176
while not completed:
104-
running, not_started = 0, 0
177+
# wait for 5 seconds before refreshing the transcription status
178+
time.sleep(5)
105179

106-
# get all transcriptions for the user
107-
transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions()
180+
transcription = api.get_transcription(transcription_id)
181+
logging.info(f"Transcriptions status: {transcription.status}")
108182

109-
# for each transcription in the list we check the status
110-
for transcription in transcriptions:
111-
if transcription.status in ("Failed", "Succeeded"):
112-
# we check to see if it was the transcription we created from this client
113-
if created_transcription != transcription.id:
114-
continue
183+
if transcription.status in ("Failed", "Succeeded"):
184+
completed = True
115185

116-
completed = True
117-
118-
if transcription.status == "Succeeded":
119-
results_uri = transcription.results_urls["channel_0"]
120-
results = requests.get(results_uri)
121-
logging.info("Transcription succeeded. Results: ")
122-
logging.info(results.content.decode("utf-8"))
123-
else:
124-
logging.info("Transcription failed :{}.".format(transcription.status_message))
125-
break
126-
elif transcription.status == "Running":
127-
running += 1
128-
elif transcription.status == "NotStarted":
129-
not_started += 1
130-
131-
logging.info("Transcriptions status: "
132-
"completed (this transcription): {}, {} running, {} not started yet".format(
133-
completed, running, not_started))
134-
135-
# wait for 5 seconds
136-
time.sleep(5)
186+
if transcription.status == "Succeeded":
187+
pag_files = api.get_transcription_files(transcription_id)
188+
for file_data in _paginate(api, pag_files):
189+
if file_data.kind != "Transcription":
190+
continue
137191

138-
input("Press any key...")
192+
audiofilename = file_data.name
193+
results_url = file_data.links.content_url
194+
results = requests.get(results_url)
195+
logging.info(f"Results for {audiofilename}:\n{results.content.decode('utf-8')}")
196+
elif transcription.status == "Failed":
197+
logging.info(f"Transcription failed: {transcription.properties.error.message}")
139198

140199

141200
if __name__ == "__main__":
142201
transcribe()
202+

0 commit comments

Comments
 (0)