|
4 | 4 | # Copyright (c) Microsoft. All rights reserved.
|
5 | 5 | # Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
6 | 6 |
|
7 |
| -from typing import List |
8 |
| - |
9 | 7 | import logging
|
10 | 8 | import sys
|
11 | 9 | import requests
|
12 | 10 | import time
|
13 | 11 | import swagger_client as cris_client
|
14 | 12 |
|
15 |
| - |
16 |
| -logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s") |
| 13 | +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, |
| 14 | + format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z") |
17 | 15 |
|
18 | 16 | # Your subscription key and region for the speech service
|
19 | 17 | SUBSCRIPTION_KEY = "YourSubscriptionKey"
|
|
25 | 23 | LOCALE = "en-US"
|
26 | 24 | RECORDINGS_BLOB_URI = "<Your SAS Uri to the recording>"
|
27 | 25 |
|
28 |
| -# Set subscription information when doing transcription with custom models |
29 |
| -ADAPTED_ACOUSTIC_ID = None # guid of a custom acoustic model |
30 |
| -ADAPTED_LANGUAGE_ID = None # guid of a custom language model |
| 26 | +# Provide the uri of a container with audio files for transcribing all of them with a single request |
| 27 | +RECORDINGS_CONTAINER_URI = "<Your SAS Uri to a container of audio files>" |
| 28 | + |
| 29 | +# Set model information when doing transcription with custom models |
| 30 | +MODEL_REFERENCE = None # guid of a custom model |
| 31 | + |
| 32 | + |
| 33 | +def transcribe_from_single_blob(uri, properties): |
| 34 | + """ |
| 35 | + Transcribe a single audio file located at `uri` using the settings specified in `properties` |
| 36 | + using the base model for the specified locale. |
| 37 | + """ |
| 38 | + transcription_definition = cris_client.Transcription( |
| 39 | + display_name=NAME, |
| 40 | + description=DESCRIPTION, |
| 41 | + locale=LOCALE, |
| 42 | + content_urls=[uri], |
| 43 | + properties=properties |
| 44 | + ) |
| 45 | + |
| 46 | + return transcription_definition |
| 47 | + |
| 48 | + |
| 49 | +def transcribe_with_custom_model(api, uri, properties): |
| 50 | + """ |
| 51 | + Transcribe a single audio file located at `uri` using the settings specified in `properties` |
| 52 | + using the base model for the specified locale. |
| 53 | + """ |
| 54 | + # Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above. |
| 55 | + if MODEL_REFERENCE is None: |
| 56 | + logging.error("Custom model ids must be set when using custom models") |
| 57 | + sys.exit() |
| 58 | + |
| 59 | + model = api.get_model(MODEL_REFERENCE) |
| 60 | + |
| 61 | + transcription_definition = cris_client.Transcription( |
| 62 | + display_name=NAME, |
| 63 | + description=DESCRIPTION, |
| 64 | + locale=LOCALE, |
| 65 | + content_urls=[uri], |
| 66 | + model=model, |
| 67 | + properties=properties |
| 68 | + ) |
| 69 | + |
| 70 | + return transcription_definition |
| 71 | + |
| 72 | + |
| 73 | +def transcribe_from_container(uri, properties): |
| 74 | + """ |
| 75 | + Transcribe all files in the container located at `uri` using the settings specified in `properties` |
| 76 | + using the base model for the specified locale. |
| 77 | + """ |
| 78 | + transcription_definition = cris_client.Transcription( |
| 79 | + display_name=NAME, |
| 80 | + description=DESCRIPTION, |
| 81 | + locale=LOCALE, |
| 82 | + content_container_url=uri, |
| 83 | + properties=properties |
| 84 | + ) |
| 85 | + |
| 86 | + return transcription_definition |
| 87 | + |
| 88 | + |
| 89 | +def _paginate(api, paginated_object): |
| 90 | + """ |
| 91 | + The autogenerated client does not support pagination. This function returns a generator over |
| 92 | + all items of the array that the paginated object `paginated_object` is part of. |
| 93 | + """ |
| 94 | + yield from paginated_object.values |
| 95 | + typename = type(paginated_object).__name__ |
| 96 | + auth_settings = ["apiKeyHeader", "apiKeyQuery"] |
| 97 | + while paginated_object.next_link: |
| 98 | + link = paginated_object.next_link[len(api.api_client.configuration.host):] |
| 99 | + paginated_object, status, headers = api.api_client.call_api(link, "GET", |
| 100 | + response_type=typename, auth_settings=auth_settings) |
| 101 | + |
| 102 | + if status == 200: |
| 103 | + yield from paginated_object.values |
| 104 | + else: |
| 105 | + raise Exception(f"could not receive paginated data: status {status}") |
| 106 | + |
| 107 | + |
| 108 | +def delete_all_transcriptions(api): |
| 109 | + """ |
| 110 | + Delete all transcriptions associated with your speech resource. |
| 111 | + """ |
| 112 | + logging.info("Deleting all existing completed transcriptions.") |
| 113 | + |
| 114 | + # get all transcriptions for the subscription |
| 115 | + transcriptions = list(_paginate(api, api.get_transcriptions())) |
| 116 | + |
| 117 | + # Delete all pre-existing completed transcriptions. |
| 118 | + # If transcriptions are still running or not started, they will not be deleted. |
| 119 | + for transcription in transcriptions: |
| 120 | + transcription_id = transcription._self.split('/')[-1] |
| 121 | + logging.debug(f"Deleting transcription with id {transcription_id}") |
| 122 | + try: |
| 123 | + api.delete_transcription(transcription_id) |
| 124 | + except cris_client.rest.ApiException as exc: |
| 125 | + logging.error(f"Could not delete transcription {transcription_id}: {exc}") |
31 | 126 |
|
32 | 127 |
|
33 | 128 | def transcribe():
|
34 | 129 | logging.info("Starting transcription client...")
|
35 | 130 |
|
36 | 131 | # configure API key authorization: subscription_key
|
37 | 132 | configuration = cris_client.Configuration()
|
38 |
| - configuration.api_key['Ocp-Apim-Subscription-Key'] = SUBSCRIPTION_KEY |
39 |
| - configuration.host = "https://{}.cris.ai".format(SERVICE_REGION) |
| 133 | + configuration.api_key["Ocp-Apim-Subscription-Key"] = SUBSCRIPTION_KEY |
| 134 | + configuration.host = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.0" |
40 | 135 |
|
41 | 136 | # create the client object and authenticate
|
42 | 137 | client = cris_client.ApiClient(configuration)
|
43 | 138 |
|
44 | 139 | # create an instance of the transcription api class
|
45 |
| - transcription_api = cris_client.CustomSpeechTranscriptionsApi(api_client=client) |
46 |
| - |
47 |
| - # get all transcriptions for the subscription |
48 |
| - transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions() |
49 |
| - |
50 |
| - logging.info("Deleting all existing completed transcriptions.") |
51 |
| - |
52 |
| - # delete all pre-existing completed transcriptions |
53 |
| - # if transcriptions are still running or not started, they will not be deleted |
54 |
| - for transcription in transcriptions: |
55 |
| - try: |
56 |
| - transcription_api.delete_transcription(transcription.id) |
57 |
| - except ValueError: |
58 |
| - # ignore swagger error on empty response message body: https://github.com/swagger-api/swagger-core/issues/2446 |
59 |
| - pass |
| 140 | + api = cris_client.DefaultApi(api_client=client) |
60 | 141 |
|
61 | 142 | # Specify transcription properties by passing a dict to the properties parameter. See
|
62 | 143 | # https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-transcription#configuration-properties
|
63 | 144 | # for supported parameters.
|
64 | 145 | properties = {
|
65 |
| - # 'PunctuationMode': 'DictatedAndAutomatic', |
66 |
| - # 'ProfanityFilterMode': 'Masked', |
67 |
| - # 'AddWordLevelTimestamps': 'False', |
68 |
| - # 'AddDiarization': 'False', |
69 |
| - # 'AddSentiment': False, |
70 |
| - # 'TranscriptionResultsContainerUrl': "<results container>" |
| 146 | + # "punctuationMode": "DictatedAndAutomatic", |
| 147 | + # "profanityFilterMode": "Masked", |
| 148 | + # "wordLevelTimestampsEnabled": True, |
| 149 | + # "diarizationEnabled": True, |
| 150 | + # "destinationContainerUrl": "<results container>", |
| 151 | + # "timeToLive": "PT1H" |
71 | 152 | }
|
72 | 153 |
|
73 | 154 | # Use base models for transcription. Comment this block if you are using a custom model.
|
74 |
| - transcription_definition = cris_client.TranscriptionDefinition( |
75 |
| - name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI, |
76 |
| - properties=properties |
77 |
| - ) |
| 155 | + transcription_definition = transcribe_from_single_blob(RECORDINGS_BLOB_URI, properties) |
78 | 156 |
|
79 | 157 | # Uncomment this block to use custom models for transcription.
|
80 |
| - # Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above. |
81 |
| - # if ADAPTED_ACOUSTIC_ID is None or ADAPTED_LANGUAGE_ID is None: |
82 |
| - # logging.info("Custom model ids must be set to when using custom models") |
83 |
| - # transcription_definition = cris_client.TranscriptionDefinition( |
84 |
| - # name=NAME, description=DESCRIPTION, locale=LOCALE, recordings_url=RECORDINGS_BLOB_URI, |
85 |
| - # models=[cris_client.ModelIdentity(ADAPTED_ACOUSTIC_ID), cris_client.ModelIdentity(ADAPTED_LANGUAGE_ID)], |
86 |
| - # properties=properties |
87 |
| - # ) |
| 158 | + # transcription_definition = transcribe_with_custom_model(api, RECORDINGS_BLOB_URI, properties) |
88 | 159 |
|
89 |
| - data, status, headers = transcription_api.create_transcription_with_http_info(transcription_definition) |
| 160 | + # Uncomment this block to transcribe all files from a container. |
| 161 | + # transcription_definition = transcribe_from_container(RECORDINGS_CONTAINER_URI, properties) |
90 | 162 |
|
91 |
| - # extract transcription location from the headers |
92 |
| - transcription_location: str = headers["location"] |
| 163 | + created_transcription, status, headers = api.create_transcription_with_http_info(transcription=transcription_definition) |
93 | 164 |
|
94 | 165 | # get the transcription Id from the location URI
|
95 |
| - created_transcription: str = transcription_location.split('/')[-1] |
| 166 | + transcription_id = headers["location"].split("/")[-1] |
96 | 167 |
|
97 |
| - logging.info("Created new transcription with id {}".format(created_transcription)) |
| 168 | + # Log information about the created transcription. If you should ask for support, please |
| 169 | + # include this information. |
| 170 | + logging.info(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}") |
98 | 171 |
|
99 | 172 | logging.info("Checking status.")
|
100 | 173 |
|
101 | 174 | completed = False
|
102 | 175 |
|
103 | 176 | while not completed:
|
104 |
| - running, not_started = 0, 0 |
| 177 | + # wait for 5 seconds before refreshing the transcription status |
| 178 | + time.sleep(5) |
105 | 179 |
|
106 |
| - # get all transcriptions for the user |
107 |
| - transcriptions: List[cris_client.Transcription] = transcription_api.get_transcriptions() |
| 180 | + transcription = api.get_transcription(transcription_id) |
| 181 | + logging.info(f"Transcriptions status: {transcription.status}") |
108 | 182 |
|
109 |
| - # for each transcription in the list we check the status |
110 |
| - for transcription in transcriptions: |
111 |
| - if transcription.status in ("Failed", "Succeeded"): |
112 |
| - # we check to see if it was the transcription we created from this client |
113 |
| - if created_transcription != transcription.id: |
114 |
| - continue |
| 183 | + if transcription.status in ("Failed", "Succeeded"): |
| 184 | + completed = True |
115 | 185 |
|
116 |
| - completed = True |
117 |
| - |
118 |
| - if transcription.status == "Succeeded": |
119 |
| - results_uri = transcription.results_urls["channel_0"] |
120 |
| - results = requests.get(results_uri) |
121 |
| - logging.info("Transcription succeeded. Results: ") |
122 |
| - logging.info(results.content.decode("utf-8")) |
123 |
| - else: |
124 |
| - logging.info("Transcription failed :{}.".format(transcription.status_message)) |
125 |
| - break |
126 |
| - elif transcription.status == "Running": |
127 |
| - running += 1 |
128 |
| - elif transcription.status == "NotStarted": |
129 |
| - not_started += 1 |
130 |
| - |
131 |
| - logging.info("Transcriptions status: " |
132 |
| - "completed (this transcription): {}, {} running, {} not started yet".format( |
133 |
| - completed, running, not_started)) |
134 |
| - |
135 |
| - # wait for 5 seconds |
136 |
| - time.sleep(5) |
| 186 | + if transcription.status == "Succeeded": |
| 187 | + pag_files = api.get_transcription_files(transcription_id) |
| 188 | + for file_data in _paginate(api, pag_files): |
| 189 | + if file_data.kind != "Transcription": |
| 190 | + continue |
137 | 191 |
|
138 |
| - input("Press any key...") |
| 192 | + audiofilename = file_data.name |
| 193 | + results_url = file_data.links.content_url |
| 194 | + results = requests.get(results_url) |
| 195 | + logging.info(f"Results for {audiofilename}:\n{results.content.decode('utf-8')}") |
| 196 | + elif transcription.status == "Failed": |
| 197 | + logging.info(f"Transcription failed: {transcription.properties.error.message}") |
139 | 198 |
|
140 | 199 |
|
141 | 200 | if __name__ == "__main__":
|
142 | 201 | transcribe()
|
| 202 | + |
0 commit comments