diff --git a/samples/ingestion/ingestion-client/.devcontainer/devcontainer.json b/samples/ingestion/ingestion-client/.devcontainer/devcontainer.json new file mode 100644 index 000000000..b434ec530 --- /dev/null +++ b/samples/ingestion/ingestion-client/.devcontainer/devcontainer.json @@ -0,0 +1,29 @@ +{ + "name": "Ingestion Client", + "image": "mcr.microsoft.com/devcontainers/dotnet:1-8.0", + "features": { + "ghcr.io/devcontainers/features/azure-cli:1": {}, + "ghcr.io/devcontainers/features/node:1": {}, + "ghcr.io/jlaundry/devcontainer-features/azure-functions-core-tools:1": {}, + "ghcr.io/devcontainers-contrib/features/pre-commit:2": {}, + "ghcr.io/rchaganti/vsc-devcontainer-features/azurebicep:1.0.5": {} + }, + "postCreateCommand": "pre-commit install", + "customizations": { + "vscode": { + "extensions": [ + "github.vscode-pull-request-github", + "ms-dotnettools.csdevkit", + "ms-vscode.azure-account", + "ms-azuretools.vscode-azurefunctions", + "editorconfig.editorconfig", + "ms-azuretools.vscode-bicep", + "ms-dotnettools.vscode-dotnet-runtime" + ], + "settings": { + "omnisharp.enableRoslynAnalyzers": true, + "omnisharp.enableEditorConfigSupport": true + } + } + } +} diff --git a/samples/ingestion/ingestion-client/.github/dependabot.yml b/samples/ingestion/ingestion-client/.github/dependabot.yml new file mode 100644 index 000000000..f33a02cd1 --- /dev/null +++ b/samples/ingestion/ingestion-client/.github/dependabot.yml @@ -0,0 +1,12 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for more information: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +# https://containers.dev/guide/dependabot + +version: 2 +updates: + - package-ecosystem: "devcontainers" + directory: "/" + schedule: + interval: weekly diff --git a/samples/ingestion/ingestion-client/.gitignore b/samples/ingestion/ingestion-client/.gitignore new file mode 100644 index 000000000..1927772bc --- /dev/null +++ b/samples/ingestion/ingestion-client/.gitignore @@ -0,0 +1 @@ +local.settings.json \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/Connector/BatchClient.cs b/samples/ingestion/ingestion-client/Connector/BatchClient.cs index 88a91b378..c5791a3e6 100644 --- a/samples/ingestion/ingestion-client/Connector/BatchClient.cs +++ b/samples/ingestion/ingestion-client/Connector/BatchClient.cs @@ -21,7 +21,7 @@ public static class BatchClient { private const string TranscriptionsBasePath = "speechtotext/v3.0/Transcriptions/"; - private const int MaxNumberOfRetries = 3; + private const int MaxNumberOfRetries = 10; private static readonly TimeSpan PostTimeout = TimeSpan.FromMinutes(1); @@ -34,7 +34,7 @@ public static class BatchClient private static readonly AsyncRetryPolicy RetryPolicy = Policy .Handle(e => e is HttpStatusCodeException || e is HttpRequestException) - .WaitAndRetryAsync(MaxNumberOfRetries, retryAttempt => TimeSpan.FromSeconds(2)); + .WaitAndRetryAsync(MaxNumberOfRetries, retryAttempt => TimeSpan.FromSeconds(5)); public static Task GetTranscriptionReportFileFromSasAsync(string sasUri) { diff --git a/samples/ingestion/ingestion-client/Connector/Enums/TranscriptionAnalyticsJobStatus.cs b/samples/ingestion/ingestion-client/Connector/Enums/TranscriptionAnalyticsJobStatus.cs new file mode 100644 index 000000000..9e8fc659c --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Enums/TranscriptionAnalyticsJobStatus.cs @@ -0,0 +1,14 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Enums +{ + public enum TranscriptionAnalyticsJobStatus + { + NotSubmitted, + Running, + Completed + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Serializable/TextAnalytics/TextAnalyticsRequest.cs b/samples/ingestion/ingestion-client/Connector/Serializable/TextAnalytics/TextAnalyticsRequest.cs deleted file mode 100644 index 5870f3919..000000000 --- a/samples/ingestion/ingestion-client/Connector/Serializable/TextAnalytics/TextAnalyticsRequest.cs +++ /dev/null @@ -1,23 +0,0 @@ -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. -// - -namespace Connector.Serializable -{ - public class TextAnalyticsRequest - { - public TextAnalyticsRequest(string language, string id, string text) - { - this.Language = language; - this.Id = id; - this.Text = text; - } - - public string Language { get; private set; } - - public string Id { get; private set; } - - public string Text { get; private set; } - } -} diff --git a/samples/ingestion/ingestion-client/Connector/Serializable/TextAnalytics/TextAnalyticsRequestsChunk.cs b/samples/ingestion/ingestion-client/Connector/Serializable/TextAnalytics/TextAnalyticsRequestsChunk.cs deleted file mode 100644 index 8f6fda501..000000000 --- a/samples/ingestion/ingestion-client/Connector/Serializable/TextAnalytics/TextAnalyticsRequestsChunk.cs +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. -// - -namespace Connector.Serializable -{ - using System.Collections.Generic; - - public class TextAnalyticsRequestsChunk - { - public TextAnalyticsRequestsChunk(IEnumerable documents) - { - this.Documents = documents; - } - - public IEnumerable Documents { get; private set; } - } -} diff --git a/samples/ingestion/ingestion-client/Connector/Serializable/TranscriptionStartedMessage/TextAnalyticsRequests.cs b/samples/ingestion/ingestion-client/Connector/Serializable/TranscriptionStartedMessage/TextAnalyticsRequests.cs index fe0cfb021..1f2dcb839 100644 --- a/samples/ingestion/ingestion-client/Connector/Serializable/TranscriptionStartedMessage/TextAnalyticsRequests.cs +++ b/samples/ingestion/ingestion-client/Connector/Serializable/TranscriptionStartedMessage/TextAnalyticsRequests.cs @@ -14,15 +14,15 @@ public TextAnalyticsRequests( IEnumerable audioLevelRequests, IEnumerable conversationRequests) { - this.UtteranceLevelRequests = utteranceLevelRequests; - this.AudioLevelRequests = audioLevelRequests; - this.ConversationRequests = conversationRequests; + this.UtteranceLevelRequests = utteranceLevelRequests ?? new List(); + this.AudioLevelRequests = audioLevelRequests ?? new List(); + this.ConversationRequests = conversationRequests ?? new List(); } - public IEnumerable UtteranceLevelRequests { get; } + public IEnumerable UtteranceLevelRequests { get; set; } - public IEnumerable AudioLevelRequests { get; } + public IEnumerable AudioLevelRequests { get; set; } - public IEnumerable ConversationRequests { get; } + public IEnumerable ConversationRequests { get; set; } } } diff --git a/samples/ingestion/ingestion-client/Connector/StorageConnector.cs b/samples/ingestion/ingestion-client/Connector/StorageConnector.cs index e410da79e..7f6bc679d 100644 --- a/samples/ingestion/ingestion-client/Connector/StorageConnector.cs +++ b/samples/ingestion/ingestion-client/Connector/StorageConnector.cs @@ -121,7 +121,7 @@ public async Task MoveFileAsync(string inputContainerName, string inputFileName, if (!keepSource) { - await inputBlockBlobClient.DeleteAsync().ConfigureAwait(false); + await inputBlockBlobClient.DeleteIfExistsAsync().ConfigureAwait(false); } return; @@ -131,7 +131,7 @@ public async Task MoveFileAsync(string inputContainerName, string inputFileName, if (!keepSource) { - await inputBlockBlobClient.DeleteAsync().ConfigureAwait(false); + await inputBlockBlobClient.DeleteIfExistsAsync().ConfigureAwait(false); } } } diff --git a/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs b/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs index b86ed6364..b48112db2 100644 --- a/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs +++ b/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs @@ -3,7 +3,7 @@ // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // -namespace FetchTranscriptionFunction +namespace FetchTranscription { using System; using System.Threading.Tasks; diff --git a/samples/ingestion/ingestion-client/FetchTranscription/README.md b/samples/ingestion/ingestion-client/FetchTranscription/README.md new file mode 100644 index 000000000..43bb87bfb --- /dev/null +++ b/samples/ingestion/ingestion-client/FetchTranscription/README.md @@ -0,0 +1,25 @@ +# Local development instructions + +Pre-requisites: +1. Please follow the instructions on [the main guide](./../infra/guide.md#ingestion-client-setup-instructions) to deploy the Ingestion Client and associated ecosystem to Azure. +2. Reopen the project within a [devcontainer](https://containers.dev/overview). (The devcontainer settings at the root of the project have the tools needed to develop and run this function locally) + +To run the FetchTranscription function locally, do the following: + +1. Run the following command to fetch your Azure Function app settings and save them to local.settings.json: + +``` +func azure functionapp fetch-app-settings +``` + +Note: Replace `` with the actual name of your function app that you can get from the Azure Portal. It will look like `FetchTranscriptionFunction-20240531T092901Z`. + +2. In the local.settings.json file generated by the previous step, replace the value of the `AzureSpeechServicesKey` with the actual key for your Azure Speech Service instance. You can get this from the Azure portal. (If you're using the SQL database or the Text Analytics, do the same for these keys as well) + +3. Navigate to the FetchTranscription function running on your Azure via the portal, and click on Stop. You need to do this so that you don't have two instances of the FetchTranscription function running and listening to the same events when you start the function from your local machine in the next step. + +4. Run the following command to start the local function (this will apply your local code changes): + +``` +func start +``` \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/ITranscriptionAnalyticsProvider.cs b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/ITranscriptionAnalyticsProvider.cs new file mode 100644 index 000000000..3eeb6bfd6 --- /dev/null +++ b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/ITranscriptionAnalyticsProvider.cs @@ -0,0 +1,38 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace FetchTranscription +{ + using System.Collections.Generic; + using System.Threading.Tasks; + + using Connector; + using Connector.Enums; + using Connector.Serializable.TranscriptionStartedServiceBusMessage; + + public interface ITranscriptionAnalyticsProvider + { + /// + /// Gets the status of the transcription analytics jobs that are monitored by the provider + /// + /// The audio file infos with transcription analytics jobs info + /// The overall status of all jobs monitored by the provider + Task GetTranscriptionAnalyticsJobStatusAsync(IEnumerable audioFileInfos); + + /// + /// Submits transcription analytics jobs based on the transcript in speechtranscript and sets the job ids in the corresponding audio file infos. + /// + /// The mapping from audio file info to transcript + /// The errors if any. + Task> SubmitTranscriptionAnalyticsJobsAsync(Dictionary speechTranscriptMappings); + + /// + /// Fetches the transcription analytics results and adds them to the corresponding speech transcript + /// + /// The mapping from audio file info to transcript + /// The errors if any. + Task> AddTranscriptionAnalyticsResultsToTranscriptsAsync(Dictionary speechTranscriptMappings); + } +} diff --git a/samples/ingestion/ingestion-client/FetchTranscription/Language/AnalyzeConversationsProvider.cs b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/Language/AnalyzeConversationsProvider.cs similarity index 88% rename from samples/ingestion/ingestion-client/FetchTranscription/Language/AnalyzeConversationsProvider.cs rename to samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/Language/AnalyzeConversationsProvider.cs index 091315d0b..bef69644a 100644 --- a/samples/ingestion/ingestion-client/FetchTranscription/Language/AnalyzeConversationsProvider.cs +++ b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/Language/AnalyzeConversationsProvider.cs @@ -3,7 +3,7 @@ // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // -namespace Language +namespace FetchTranscription { using System; using System.Collections.Generic; @@ -17,11 +17,11 @@ namespace Language using Connector; using Connector.Constants; + using Connector.Enums; using Connector.Serializable.Language.Conversations; + using Connector.Serializable.TextAnalytics; using Connector.Serializable.TranscriptionStartedServiceBusMessage; - using FetchTranscriptionFunction; - using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; @@ -32,7 +32,7 @@ namespace Language /// /// Analyze Conversations async client. /// - public class AnalyzeConversationsProvider + public class AnalyzeConversationsProvider : ITranscriptionAnalyticsProvider { private const string DefaultInferenceSource = "lexical"; private static readonly TimeSpan RequestTimeout = TimeSpan.FromMinutes(3); @@ -44,8 +44,7 @@ public class AnalyzeConversationsProvider public AnalyzeConversationsProvider(string locale, string subscriptionKey, string endpoint, ILogger log, IOptions appConfig) { - this.conversationAnalysisClient = new ConversationAnalysisClient(new Uri($"https://{region}.api.cognitive.microsoft.com"), new AzureKeyCredential(subscriptionKey)); - + this.conversationAnalysisClient = new ConversationAnalysisClient(new Uri(endpoint), new AzureKeyCredential(subscriptionKey)); this.locale = locale; this.log = log; this.appConfig = appConfig?.Value; @@ -208,7 +207,7 @@ private static IEnumerable GetAllErrorsFromResults((IEnumerable /// Instance of the speech transcript. /// An enumerable of the jobs IDs and errors if any. - public async Task<(IEnumerable jobIds, IEnumerable errors)> SubmitAnalyzeConversationsRequestAsync(SpeechTranscript speechTranscript) + private async Task<(IEnumerable jobIds, IEnumerable errors)> SubmitAnalyzeConversationsRequestAsync(SpeechTranscript speechTranscript) { speechTranscript = speechTranscript ?? throw new ArgumentNullException(nameof(speechTranscript)); var data = new List(); @@ -223,7 +222,7 @@ private static IEnumerable GetAllErrorsFromResults((IEnumerable /// Enumerable of conversational jobIds. /// Enumerable of results of conversation PII redaction and errors encountered if any. - public async Task<(AnalyzeConversationPiiResults piiResults, AnalyzeConversationSummarizationResults summarizationResults, IEnumerable errors)> GetConversationsOperationsResult(IEnumerable jobIds) + private async Task<(AnalyzeConversationPiiResults piiResults, AnalyzeConversationSummarizationResults summarizationResults, IEnumerable errors)> GetConversationsOperationsResult(IEnumerable jobIds) { var errors = new List(); if (!jobIds.Any()) @@ -231,10 +230,10 @@ private static IEnumerable GetAllErrorsFromResults((IEnumerable await this.GetConversationsOperationResults(jobId).ConfigureAwait(false)); + var tasks = jobIds.Select(this.GetConversationsOperationResults); var results = await Task.WhenAll(tasks).ConfigureAwait(false); - var resultsErrors = results.SelectMany(result => result.piiResults).SelectMany(s => s.Errors).Concat(results.SelectMany(result => result.summarizationResults).SelectMany(s => s.Errors)); + var resultsErrors = GetAllErrorsFromResults(results); if (resultsErrors.Any()) { errors.AddRange(resultsErrors.Select(s => $"Error thrown for conversation : {s.Id}")); @@ -291,50 +290,13 @@ private static IEnumerable GetAllErrorsFromResults((IEnumerable - /// Checks for all conversational analytics requests that were marked as running if they have completed and sets a new state accordingly. - /// - /// Enumerable for audioFiles. - /// True if all requests completed, else false. - public async Task ConversationalRequestsCompleted(IEnumerable audioFileInfos) - { - if (!(IsConversationalPiiEnabled() || IsConversationalSummarizationEnabled()) || !audioFileInfos.Where(audioFileInfo => audioFileInfo.TextAnalyticsRequests.ConversationRequests != null).Any()) - { - return true; - } - - var conversationRequests = audioFileInfos.SelectMany(audioFileInfo => audioFileInfo.TextAnalyticsRequests.ConversationRequests).Where(text => text.Status == TextAnalyticsRequestStatus.Running); - - var runningJobsCount = 0; - - foreach (var textAnalyticsJob in conversationRequests) - { - var response = await this.conversationAnalysisClient.GetAnalyzeConversationJobStatusAsync(Guid.Parse(textAnalyticsJob.Id)).ConfigureAwait(false); - - if (response.IsError) - { - continue; - } - - var analysisResult = JsonConvert.DeserializeObject(response.Content.ToString()); - - if (analysisResult.Tasks.InProgress != 0) - { - // some jobs are still running. - runningJobsCount++; - } - } - - return runningJobsCount == 0; - } - /// /// Gets the (audio-level) results from text analytics, adds the results to the speech transcript. /// /// The conversation analysis job Ids. /// The speech transcript object. /// The errors, if any. - public async Task> AddConversationalEntitiesAsync( + private async Task> AddConversationalEntitiesAsync( IEnumerable conversationJobIds, SpeechTranscript speechTranscript) { @@ -568,7 +530,7 @@ private void PreparePiiRequest(SpeechTranscript speechTranscript, List piiResults, IEnumerable summarizationResults, IEnumerable errors)> GetConversationsOperationResults(string jobId) { + var piiResults = new List(); + var summarizationResults = new List(); var errors = new List(); try { @@ -610,14 +574,14 @@ private void PreparePiiRequest(SpeechTranscript speechTranscript, List item.Kind == AnalyzeConversationsTaskResultKind.conversationalPIIResults) - .Select(s => s as ConversationPiiItem) - .Select(s => s.Results); - var summarizationResults = analysisResult.Tasks - .Items.Where(item => item.Kind == AnalyzeConversationsTaskResultKind.conversationalSummarizationResults) - .Select(s => s as ConversationSummarizationItem) - .Select(s => s.Results); + piiResults.AddRange(analysisResult.Tasks + .Items + .Where(item => item.Kind == AnalyzeConversationsTaskResultKind.conversationalPIIResults && (item as ConversationPiiItem)?.Results != null) + .Select(s => ((ConversationPiiItem)s).Results)); + summarizationResults.AddRange(analysisResult.Tasks + .Items + .Where(item => item.Kind == AnalyzeConversationsTaskResultKind.conversationalSummarizationResults && (item as ConversationSummarizationItem)?.Results != null) + .Select(s => ((ConversationSummarizationItem)s).Results)); return (piiResults, summarizationResults, errors); } } @@ -632,7 +596,7 @@ private void PreparePiiRequest(SpeechTranscript speechTranscript, List -namespace TextAnalytics +namespace FetchTranscription { using System; using System.Collections.Generic; @@ -18,35 +18,12 @@ namespace TextAnalytics using Connector.Enums; using Connector.Serializable.TranscriptionStartedServiceBusMessage; - using FetchTranscriptionFunction; - - using Language; - using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using static Connector.Serializable.TranscriptionStartedServiceBusMessage.TextAnalyticsRequest; - /// - /// The text analytics provide. - /// - /// General overview of text analytics request processing: - /// - /// For a succeded transcription, check if transcription has text analytics job info. - /// if true: - /// Check if text analytics job terminated. - /// if true: - /// Add text analytics results to transcript, write transcript to storage. - /// if false: - /// Re-enqueue job, check again after X minutes. - /// if false: - /// Check if text analytics is requested - /// if true: - /// Add text analytics job info to transcription. Re-enqueue job, check again after X minutes. - /// if false: - /// Write transcript to storage. - /// - public class TextAnalyticsProvider + public class TextAnalyticsProvider : ITranscriptionAnalyticsProvider { private const int MaxRecordsPerRequest = 25; @@ -62,7 +39,7 @@ public class TextAnalyticsProvider public TextAnalyticsProvider(string locale, string subscriptionKey, string endpoint, ILogger log, IOptions appConfig) { - this.textAnalyticsClient = new TextAnalyticsClient(new Uri($"https://{region}.api.cognitive.microsoft.com"), new AzureKeyCredential(subscriptionKey)); + this.textAnalyticsClient = new TextAnalyticsClient(new Uri(endpoint), new AzureKeyCredential(subscriptionKey)); this.locale = locale; this.log = log; this.appConfig = appConfig?.Value; @@ -79,7 +56,12 @@ public async Task GetTranscriptionAnalyticsJobS { if (!this.IsTextAnalyticsRequested()) { - return true; + return TranscriptionAnalyticsJobStatus.Completed; + } + + if (!audioFileInfos.Where(audioFileInfo => audioFileInfo.TextAnalyticsRequests != null).Any()) + { + return TranscriptionAnalyticsJobStatus.NotSubmitted; } var runningTextAnalyticsRequests = new List(); @@ -96,8 +78,7 @@ public async Task GetTranscriptionAnalyticsJobS .SelectMany(audioFileInfo => audioFileInfo.TextAnalyticsRequests.UtteranceLevelRequests) .Where(text => text.Status == TextAnalyticsRequestStatus.Running)); - var textAnalyticsRequestCompleted = true; - + var status = TranscriptionAnalyticsJobStatus.Completed; foreach (var textAnalyticsJob in runningTextAnalyticsRequests) { var operation = new AnalyzeActionsOperation(textAnalyticsJob.Id, this.textAnalyticsClient); @@ -112,7 +93,8 @@ public async Task GetTranscriptionAnalyticsJobS } else { - textAnalyticsRequestCompleted = false; + // if one or more jobs are still running, report status as running: + status = TranscriptionAnalyticsJobStatus.Running; } } @@ -228,7 +210,7 @@ public async Task> AddTranscriptionAnalyticsResultsToTranscr /// The speech transcript object. /// The sentiment analysis setting. /// The job ids and errors, if any were found. - public async Task<(IEnumerable jobIds, IEnumerable errors)> SubmitUtteranceLevelRequests( + private async Task<(IEnumerable jobIds, IEnumerable errors)> SubmitUtteranceLevelRequests( SpeechTranscript speechTranscript, SentimentAnalysisSetting sentimentAnalysisSetting) { @@ -258,7 +240,7 @@ public async Task> AddTranscriptionAnalyticsResultsToTranscr /// The sentiment analysis setting. /// The PII redaction setting. /// The job ids and errors, if any were found. - public async Task<(IEnumerable jobIds, IEnumerable errors)> SubmitAudioLevelRequests( + private async Task<(IEnumerable jobIds, IEnumerable errors)> SubmitAudioLevelRequests( SpeechTranscript speechTranscript, SentimentAnalysisSetting sentimentAnalysisSetting, PiiRedactionSetting piiRedactionSetting) @@ -308,7 +290,7 @@ public async Task> AddTranscriptionAnalyticsResultsToTranscr /// The text analytics job ids. /// The speech transcript object. /// The errors, if any. - public async Task> AddUtteranceLevelEntitiesAsync( + private async Task> AddUtteranceLevelEntitiesAsync( IEnumerable jobIds, SpeechTranscript speechTranscript) { @@ -354,7 +336,7 @@ public async Task> AddUtteranceLevelEntitiesAsync( /// The text analytics job ids. /// The speech transcript object. /// The errors, if any. - public async Task> AddAudioLevelEntitiesAsync( + private async Task> AddAudioLevelEntitiesAsync( IEnumerable jobIds, SpeechTranscript speechTranscript) { diff --git a/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/TranscriptionAnalyticsOrchestrator.cs b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/TranscriptionAnalyticsOrchestrator.cs new file mode 100644 index 000000000..78842df0d --- /dev/null +++ b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionAnalytics/TranscriptionAnalyticsOrchestrator.cs @@ -0,0 +1,109 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace FetchTranscription +{ + using System; + using System.Collections.Generic; + using System.Threading.Tasks; + + using Connector; + using Connector.Enums; + using Connector.Serializable.TranscriptionStartedServiceBusMessage; + + using Microsoft.Extensions.Logging; + using Microsoft.Extensions.Options; + + public sealed class TranscriptionAnalyticsOrchestrator + { + private readonly List providers; + + private readonly AppConfig appConfig; + + public TranscriptionAnalyticsOrchestrator( + string locale, + ILogger logger, + IOptions appConfig) + { + this.appConfig = appConfig?.Value; + this.providers = new List(); + + if (!string.IsNullOrEmpty(this.appConfig.TextAnalyticsKey) && !string.IsNullOrEmpty(this.appConfig.TextAnalyticsEndpoint)) + { + this.providers.Add(new TextAnalyticsProvider(locale, this.appConfig.TextAnalyticsKey, this.appConfig.TextAnalyticsEndpoint, logger, Options.Create(this.appConfig))); + this.providers.Add(new AnalyzeConversationsProvider(locale, this.appConfig.TextAnalyticsKey, this.appConfig.TextAnalyticsEndpoint, logger, Options.Create(this.appConfig))); + } + } + + /// + /// Gets the merged status of all transcription analytics jobs. + /// + /// The transcription started service bus message. + /// The merged job status. + public async Task GetTranscriptionAnalyticsJobsStatusAsync(TranscriptionStartedMessage transcriptionStartedMessage) + { + _ = transcriptionStartedMessage ?? throw new ArgumentNullException(nameof(transcriptionStartedMessage)); + + foreach (var provider in this.providers) + { + var providerStatus = await provider.GetTranscriptionAnalyticsJobStatusAsync(transcriptionStartedMessage.AudioFileInfos).ConfigureAwait(false); + + // if any is not submitted, we can safely return here since we submit all requests at the same time - therefore all other providers should not have any running requests. + if (providerStatus == TranscriptionAnalyticsJobStatus.NotSubmitted) + { + return TranscriptionAnalyticsJobStatus.NotSubmitted; + } + + // if any is running, we set the status to running and fetch it again after some time. + if (providerStatus == TranscriptionAnalyticsJobStatus.Running) + { + return TranscriptionAnalyticsJobStatus.Running; + } + } + + return TranscriptionAnalyticsJobStatus.Completed; + } + + /// + /// Submit transcription analytics jobs and adds their IDs to the audio file infos, so that they can get fetched the next time the transcription job status is polled. + /// + /// The mapping from audio file infos to speech transcripts. + /// The errors if any. + public async Task> SubmitTranscriptionAnalyticsJobsAndAddToAudioFileInfos(Dictionary speechTranscriptMappings) + { + _ = speechTranscriptMappings ?? throw new ArgumentNullException(nameof(speechTranscriptMappings)); + + var errors = new List(); + + foreach (var provider in this.providers) + { + var providerErros = await provider.SubmitTranscriptionAnalyticsJobsAsync(speechTranscriptMappings).ConfigureAwait(false); + errors.AddRange(providerErros); + } + + return errors; + } + + /// + /// Adds the result of all transcription analytics jobs to the corresponding speech transcript. + /// + /// The mapping from audio file infos to speech transcripts. + /// The errors if any. + public async Task> AddTranscriptionAnalyticsResultsToTranscripts(Dictionary speechTranscriptMappings) + { + _ = speechTranscriptMappings ?? throw new ArgumentNullException(nameof(speechTranscriptMappings)); + + var errors = new List(); + + foreach (var provider in this.providers) + { + var providerErros = await provider.AddTranscriptionAnalyticsResultsToTranscriptsAsync(speechTranscriptMappings).ConfigureAwait(false); + errors.AddRange(providerErros); + } + + return errors; + } + } +} diff --git a/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs index c6aa28af6..faa4c7f10 100644 --- a/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs +++ b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs @@ -3,7 +3,7 @@ // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // -namespace FetchTranscriptionFunction +namespace FetchTranscription { using System; using System.Collections.Generic; @@ -20,16 +20,12 @@ namespace FetchTranscriptionFunction using Connector.Enums; using Connector.Serializable.TranscriptionStartedServiceBusMessage; - using Language; - using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Azure; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Newtonsoft.Json; - using TextAnalytics; - using static Connector.Serializable.TranscriptionStartedServiceBusMessage.TextAnalyticsRequest; public class TranscriptionProcessor { @@ -397,112 +393,21 @@ private async Task ProcessSucceededTranscriptionAsync(string transcriptionLocati } } - if (textAnalyticsProvider != null && - (FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting != SentimentAnalysisSetting.None - || FetchTranscriptionEnvironmentVariables.PiiRedactionSetting != PiiRedactionSetting.None - || AnalyzeConversationsProvider.IsConversationalPiiEnabled() - || AnalyzeConversationsProvider.IsConversationalSummarizationEnabled())) + if (transcriptionAnalyticsJobStatus == TranscriptionAnalyticsJobStatus.Completed) { - // If we already got text analytics requests in the transcript (containsTextAnalyticsRequest), add the results to the transcript. - // Otherwise, submit new text analytics requests. - if (containsTextAnalyticsRequest) - { - foreach (var speechTranscriptMapping in speechTranscriptMappings) - { - var speechTranscript = speechTranscriptMapping.Value; - var audioFileInfo = speechTranscriptMapping.Key; - var fileName = audioFileInfo.FileName; - if (FetchTranscriptionEnvironmentVariables.PiiRedactionSetting != PiiRedactionSetting.None) - { - speechTranscript.RecognizedPhrases.ToList().ForEach(phrase => - { - if (phrase.NBest != null && phrase.NBest.Any()) - { - var firstNBest = phrase.NBest.First(); - phrase.NBest = new[] { firstNBest }; - } - }); - } - - var textAnalyticsErrors = new List(); - - if (audioFileInfo.TextAnalyticsRequests.AudioLevelRequests?.Any() == true) - { - var audioLevelErrors = await textAnalyticsProvider.AddAudioLevelEntitiesAsync(audioFileInfo.TextAnalyticsRequests.AudioLevelRequests.Select(request => request.Id), speechTranscript).ConfigureAwait(false); - textAnalyticsErrors.AddRange(audioLevelErrors); - } - - if (audioFileInfo.TextAnalyticsRequests.UtteranceLevelRequests?.Any() == true) - { - var utteranceLevelErrors = await textAnalyticsProvider.AddUtteranceLevelEntitiesAsync(audioFileInfo.TextAnalyticsRequests.UtteranceLevelRequests.Select(request => request.Id), speechTranscript).ConfigureAwait(false); - textAnalyticsErrors.AddRange(utteranceLevelErrors); - } - - if (audioFileInfo.TextAnalyticsRequests.ConversationRequests?.Any() == true) - { - var conversationalAnalyticsErrors = await conversationsAnalysisProvider.AddConversationalEntitiesAsync(audioFileInfo.TextAnalyticsRequests.ConversationRequests.Select(request => request.Id), speechTranscript).ConfigureAwait(false); - textAnalyticsErrors.AddRange(conversationalAnalyticsErrors); - } - - if (textAnalyticsErrors.Any()) - { - var distinctErrors = textAnalyticsErrors.Distinct(); - var errorMessage = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}"; + var errors = await transcriptionAnalyticsOrchestrator.AddTranscriptionAnalyticsResultsToTranscripts(speechTranscriptMappings).ConfigureAwait(false); - generalErrorsStringBuilder.AppendLine(errorMessage); - } - } + foreach (var error in errors) + { + generalErrorsStringBuilder.AppendLine(error); } - else + } + else if (transcriptionAnalyticsJobStatus == TranscriptionAnalyticsJobStatus.NotSubmitted) + { + var errors = await transcriptionAnalyticsOrchestrator.SubmitTranscriptionAnalyticsJobsAndAddToAudioFileInfos(speechTranscriptMappings).ConfigureAwait(false); + foreach (var error in errors) { - foreach (var speechTranscriptMapping in speechTranscriptMappings) - { - var speechTranscript = speechTranscriptMapping.Value; - var audioFileInfo = speechTranscriptMapping.Key; - - var fileName = audioFileInfo.FileName; - - if (speechTranscript.RecognizedPhrases != null && speechTranscript.RecognizedPhrases.All(phrase => phrase.RecognitionStatus.Equals("Success", StringComparison.Ordinal))) - { - var textAnalyticsErrors = new List(); - - (var utteranceLevelJobIds, var utteranceLevelErrors) = await textAnalyticsProvider.SubmitUtteranceLevelRequests( - speechTranscript, - FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting).ConfigureAwait(false); - - var utteranceLevelRequests = utteranceLevelJobIds?.Select(jobId => new TextAnalyticsRequest(jobId, TextAnalyticsRequestStatus.Running)); - textAnalyticsErrors.AddRange(utteranceLevelErrors); - - (var audioLevelJobIds, var audioLevelErrors) = await textAnalyticsProvider.SubmitAudioLevelRequests( - speechTranscript, - FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting, - FetchTranscriptionEnvironmentVariables.PiiRedactionSetting).ConfigureAwait(false); - - var audioLevelRequests = audioLevelJobIds?.Select(jobId => new TextAnalyticsRequest(jobId, TextAnalyticsRequestStatus.Running)); - textAnalyticsErrors.AddRange(audioLevelErrors); - - (var conversationJobIds, var conversationErrors) = await conversationsAnalysisProvider.SubmitAnalyzeConversationsRequestAsync(speechTranscript).ConfigureAwait(false); - - var conversationalRequests = conversationJobIds?.Select(jobId => new TextAnalyticsRequest(jobId, TextAnalyticsRequestStatus.Running)); - textAnalyticsErrors.AddRange(conversationErrors); - - audioFileInfo.TextAnalyticsRequests = new TextAnalyticsRequests(utteranceLevelRequests, audioLevelRequests, conversationalRequests); - - if (textAnalyticsErrors.Any()) - { - var distinctErrors = textAnalyticsErrors.Distinct(); - var errorMessage = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}"; - - generalErrorsStringBuilder.AppendLine(errorMessage); - } - } - } - - log.LogInformation($"Added text analytics requests to service bus message - re-queueing message."); - - // Poll for first time with TA request after 1 minute - await ServiceBusUtilities.SendServiceBusMessageAsync(FetchServiceBusSender, serviceBusMessage.CreateMessageString(), log, TimeSpan.FromMinutes(1)).ConfigureAwait(false); - return; + generalErrorsStringBuilder.AppendLine(error); } var textAnalyticsSubmitErrors = generalErrorsStringBuilder.ToString(); diff --git a/samples/ingestion/ingestion-client/StartTranscriptionByServiceBus/README.md b/samples/ingestion/ingestion-client/StartTranscriptionByServiceBus/README.md new file mode 100644 index 000000000..aca51650e --- /dev/null +++ b/samples/ingestion/ingestion-client/StartTranscriptionByServiceBus/README.md @@ -0,0 +1,25 @@ +# Local development instructions + +Pre-requisites: +1. Please follow the instructions on [the main guide](./../infra/guide.md#ingestion-client-setup-instructions) to deploy the Ingestion Client and associated ecosystem to Azure. +2. Reopen the project within a [devcontainer](https://containers.dev/overview). (The devcontainer settings at the root of the project have the tools needed to develop and run this function locally) + +To run the StartTranscriptionByServiceBus function locally, do the following: + +1. Run the following command to fetch your Azure Function app settings and save them to local.settings.json: + +``` +func azure functionapp fetch-app-settings +``` + +Note: Replace `` with the actual name of your function app that you can get from the Azure Portal. It will look like `StartTranscriptionFunction-20240531T092901Z`. + +2. In the local.settings.json file generated by the previous step, replace the value of the `AzureSpeechServicesKey` with the actual key for your Azure Speech Service instance. You can get this from the Azure portal. + +3. Navigate to the StartTranscription function running on your Azure via the portal, and click on Stop. You need to do this so that you don't have two instances of the StartTranscription function running and listening to the same events when you start the function from your local machine in the next step. + +4. Run the following command to start the local function (this will apply your local code changes): + +``` +func start +``` \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/StartTranscriptionByTimer/README.md b/samples/ingestion/ingestion-client/StartTranscriptionByTimer/README.md new file mode 100644 index 000000000..da5773518 --- /dev/null +++ b/samples/ingestion/ingestion-client/StartTranscriptionByTimer/README.md @@ -0,0 +1,25 @@ +# Local development instructions + +Pre-requisites: +1. Please follow the instructions on [the main guide](./../infra/guide.md#ingestion-client-setup-instructions) to deploy the Ingestion Client and associated ecosystem to Azure. +2. Reopen the project within a [devcontainer](https://containers.dev/overview). (The devcontainer settings at the root of the project have the tools needed to develop and run this function locally) + +To run the StartTranscriptionByTimer function locally, do the following: + +1. Run the following command to fetch your Azure Function app settings and save them to local.settings.json: + +``` +func azure functionapp fetch-app-settings +``` + +Note: Replace `` with the actual name of your function app that you can get from the Azure Portal. It will look like `StartTranscriptionFunction-20240531T092901Z`. + +2. In the local.settings.json file generated by the previous step, replace the value of the `AzureSpeechServicesKey` with the actual key for your Azure Speech Service instance. You can get this from the Azure portal. + +3. Navigate to the StartTranscription function running on your Azure via the portal, and click on Stop. You need to do this so that you don't have two instances of the StartTranscription function running and listening to the same events when you start the function from your local machine in the next step. + +4. Run the following command to start the local function (this will apply your local code changes): + +``` +func start +``` \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/Tests/EndToEndTests.cs b/samples/ingestion/ingestion-client/Tests/EndToEndTests.cs index 555d94261..0ea366690 100644 --- a/samples/ingestion/ingestion-client/Tests/EndToEndTests.cs +++ b/samples/ingestion/ingestion-client/Tests/EndToEndTests.cs @@ -14,7 +14,7 @@ namespace Tests using Connector.Serializable.Language.Conversations; using Connector.Serializable.TranscriptionStartedServiceBusMessage; - using Language; + using FetchTranscription; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; @@ -69,11 +69,9 @@ public async Task AnalyzeConversationTestAsync() var errors = await provider.SubmitTranscriptionAnalyticsJobsAsync(speechTranscriptMapping).ConfigureAwait(false); Console.WriteLine("Submit"); - Console.WriteLine(JsonConvert.SerializeObject(jobIds)); - Assert.AreEqual(0, jobIds.errors.Count()); - var req = jobIds.jobIds.Select(jobId => new AudioFileInfo(default, default, new TextAnalyticsRequests(default, default, new[] { new TextAnalyticsRequest(jobId, TextAnalyticsRequest.TextAnalyticsRequestStatus.Running) }))); + Assert.AreEqual(0, errors.Count()); - while (!await provider.ConversationalRequestsCompleted(req).ConfigureAwait(false)) + while ((await provider.GetTranscriptionAnalyticsJobStatusAsync(speechTranscriptMapping.Keys).ConfigureAwait(false)) == Connector.Enums.TranscriptionAnalyticsJobStatus.Running) { await Task.Delay(TimeSpan.FromSeconds(10)).ConfigureAwait(false); Console.WriteLine($"[{DateTime.Now}]jobs are running..."); @@ -81,7 +79,7 @@ public async Task AnalyzeConversationTestAsync() Console.WriteLine($"[{DateTime.Now}]jobs done."); - var err = await provider.AddConversationalEntitiesAsync(jobIds.jobIds, transcription); + var err = await provider.AddTranscriptionAnalyticsResultsToTranscriptsAsync(speechTranscriptMapping); Console.WriteLine($"annotation result: {JsonConvert.SerializeObject(transcription)}"); Assert.AreEqual(0, err.Count()); Assert.AreEqual(4, transcription.ConversationAnalyticsResults.AnalyzeConversationSummarizationResults.Conversations.First().Summaries.Count()); diff --git a/samples/ingestion/ingestion-client/infra/guide.md b/samples/ingestion/ingestion-client/infra/guide.md index 4ad4595c3..8f2c467ea 100644 --- a/samples/ingestion/ingestion-client/infra/guide.md +++ b/samples/ingestion/ingestion-client/infra/guide.md @@ -14,7 +14,7 @@ The following diagram shows the structure of this tool as defined by the ARM tem ![Architecture](./images/architecture.png) -When a file lands in a storage container, the Grid event indicates the completed upload of a file. The file is filtered and pushed to a Service bus topic. Code in Azure Functions triggered by a timer picks up the event and creates a transmission request using the Azure Speech services batch pipeline. When the transmission request is complete, an event is placed in another queue in the same service bus resource. A different Azure Function triggered by the completion event starts monitoring transcription completion status. When transcription completes, the Azure Function copies the transcript into the same container where the audio file was obtained. +When a file lands in a storage container, the Grid event indicates the completed upload of a file. The file is filtered and pushed to a Service bus topic. Code in Azure Functions triggered by a timer picks up the event and creates a transcription request using the Azure Speech services batch pipeline. When the transcription request is complete, an event is placed in another queue in the same service bus resource. A different Azure Function triggered by the completion event starts monitoring transcription completion status. When transcription completes, the Azure Function copies the transcript into the same container where the audio file was obtained. The rest of the features are applied on demand. By deploying additional resources through the ARM template, you can choose to apply analytics on the transcript, produce reports or redact. @@ -57,17 +57,7 @@ To deploy the required infrastructure, click the button below: [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2FAzure-Samples%2Fcognitive-services-speech-sdk%2Fmaster%2Fsamples%2Fingestion%2Fingestion-client%2Finfra%2Fmain.json) -4. Load the template by clicking **Load file**. Alternatively, -you could copy/paste the template in the editor. - -![Load template](./images/image007.png) - -5. Once the template text is loaded you will be able to read and edit the transcript. Do -**NOT** attempt any edits at this stage. You need to save the template you loaded, so click the **Save** button. - -![Save template](./images/image009.png) - -Saving the template will result in the screen below. You will need to fill in the form provided. It is +This will result in the screen below on your browser. You will need to fill in the form provided. It is important that all the information is correct. Let us look at the form and go through each field. ![form template](./images/image011.png)