From cee450af4b09d3a75867495fe185462bc712e9e3 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Thu, 9 Nov 2023 15:49:29 -0800 Subject: [PATCH] initial changes to allow sample js to work with ESM v1.33.1 (#2132) --- .../node/conversation-transcription/index.js | 110 ++++--- .../conversation-transcription/package.json | 3 +- quickstart/javascript/node/from-file/index.js | 7 +- .../javascript/node/from-file/package.json | 3 +- .../node/meeting-transcription/index.js | 163 +++++----- .../node/meeting-transcription/package.json | 3 +- .../independent-identification.js | 102 +++--- .../identification/package.json | 3 +- .../verification/dependent-verification.js | 107 +++---- .../verification/package.json | 3 +- .../javascript/node/text-to-speech/index.js | 101 +++--- .../node/text-to-speech/package.json | 3 +- samples/js/node/filePushStream.js | 152 +++++---- samples/js/node/index.js | 113 ++++--- samples/js/node/intent.js | 63 ++-- samples/js/node/micSTT.js | 8 +- samples/js/node/package.json | 3 +- samples/js/node/pronunciationAssessment.js | 84 +++-- ...onunciationAssessmentConfiguredWithJson.js | 86 +++--- .../node/pronunciationAssessmentContinue.js | 291 +++++++++--------- .../pronunciationAssessmentFromMicrophone.js | 172 +++++------ samples/js/node/settings.js | 37 +-- samples/js/node/speech.js | 176 +++++------ samples/js/node/synthesis.js | 146 +++++---- samples/js/node/translation.js | 100 +++--- 25 files changed, 975 insertions(+), 1064 deletions(-) diff --git a/quickstart/javascript/node/conversation-transcription/index.js b/quickstart/javascript/node/conversation-transcription/index.js index 75fe8509e..417b98ea6 100644 --- a/quickstart/javascript/node/conversation-transcription/index.js +++ b/quickstart/javascript/node/conversation-transcription/index.js @@ -1,60 +1,56 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -(function() { - "use strict"; - - // pull in the required packages. - var sdk = require("microsoft-cognitiveservices-speech-sdk"); - var fs = require("fs"); - - // replace with your own subscription key, - // service region (e.g., "centralus"), and - // the name of the file you want to transcribe - // through the conversation transcriber. - var subscriptionKey = "YourSubscriptionKey"; - var serviceRegion = "YourServiceRegion"; // e.g., "centralus" - var filename = "YourAudioFile.wav"; - - // create the push stream we need for the speech sdk. - var pushStream = sdk.AudioInputStream.createPushStream(); - - // open the file and push it to the push stream. - fs.createReadStream(filename).on('data', function(arrayBuffer) { - pushStream.write(arrayBuffer.slice()); - }).on('end', function() { - pushStream.close(); - }); - - // we are done with the setup - console.log("Transcribing from: " + filename); - // now create the audio-config pointing to our stream and - // the speech config specifying the language. - var speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); - var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream); - - // create the conversation transcriber. - var transcriber = new sdk.ConversationTranscriber(speechConfig, audioConfig); - - transcriber.sessionStarted = function(s, e) { - console.log("(sessionStarted) SessionId:" + e.sessionId); - }; - transcriber.sessionStopped = function(s, e) { - console.log("(sessionStopped) SessionId:" + e.sessionId); - }; - transcriber.canceled = function(s, e) { - console.log("(canceled) " + e.errorDetails); - }; - transcriber.transcribed = function(s, e) { - console.log("(transcribed) text: " + e.result.text); - console.log("(transcribed) speakerId: " + e.result.speakerId); - }; - - // Begin conversation transcription - transcriber.startTranscribingAsync( - function () {}, - function (err) { - console.trace("err - starting transcription: " + err); - } - ); -}()); +// pull in the required packages. +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as fs from "fs"; + +// replace with your own subscription key, +// service region (e.g., "centralus"), and +// the name of the file you want to transcribe +// through the conversation transcriber. +var subscriptionKey = "YourSubscriptionKey"; +var serviceRegion = "YourServiceRegion"; // e.g., "centralus" +var filename = "YourAudioFile.wav"; + +// create the push stream we need for the speech sdk. +var pushStream = sdk.AudioInputStream.createPushStream(); + +// open the file and push it to the push stream. +fs.createReadStream(filename).on('data', function(arrayBuffer) { +pushStream.write(arrayBuffer.slice()); +}).on('end', function() { +pushStream.close(); +}); + +// we are done with the setup +console.log("Transcribing from: " + filename); +// now create the audio-config pointing to our stream and +// the speech config specifying the language. +var speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); +var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream); + +// create the conversation transcriber. +var transcriber = new sdk.ConversationTranscriber(speechConfig, audioConfig); + +transcriber.sessionStarted = function(s, e) { + console.log("(sessionStarted) SessionId:" + e.sessionId); +}; +transcriber.sessionStopped = function(s, e) { + console.log("(sessionStopped) SessionId:" + e.sessionId); +}; +transcriber.canceled = function(s, e) { + console.log("(canceled) " + e.errorDetails); +}; +transcriber.transcribed = function(s, e) { + console.log("(transcribed) text: " + e.result.text); + console.log("(transcribed) speakerId: " + e.result.speakerId); +}; + +// Begin conversation transcription +transcriber.startTranscribingAsync( + function () {}, + function (err) { + console.trace("err - starting transcription: " + err); + } +); diff --git a/quickstart/javascript/node/conversation-transcription/package.json b/quickstart/javascript/node/conversation-transcription/package.json index e15763069..e270e5514 100644 --- a/quickstart/javascript/node/conversation-transcription/package.json +++ b/quickstart/javascript/node/conversation-transcription/package.json @@ -4,6 +4,7 @@ "version": "1.0.0", "description": "Quickstart for the Microsoft Speech SDK on Node.js", "main": "index.js", + "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, @@ -11,6 +12,6 @@ "license": "MIT", "dependencies": { "https-proxy-agent": "^3.0.0", - "microsoft-cognitiveservices-speech-sdk": "^1.33.0" + "microsoft-cognitiveservices-speech-sdk": "^1.33.1" } } diff --git a/quickstart/javascript/node/from-file/index.js b/quickstart/javascript/node/from-file/index.js index 854634779..0eead54eb 100644 --- a/quickstart/javascript/node/from-file/index.js +++ b/quickstart/javascript/node/from-file/index.js @@ -1,13 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -(function() { // - "use strict"; // pull in the required packages. - var sdk = require("microsoft-cognitiveservices-speech-sdk"); - var fs = require("fs"); + import * as sdk from "microsoft-cognitiveservices-speech-sdk"; + import * as fs from "fs"; // replace with your own subscription key, // service region (e.g., "westus"), and @@ -57,5 +55,4 @@ }); // -}()); \ No newline at end of file diff --git a/quickstart/javascript/node/from-file/package.json b/quickstart/javascript/node/from-file/package.json index e15763069..e270e5514 100644 --- a/quickstart/javascript/node/from-file/package.json +++ b/quickstart/javascript/node/from-file/package.json @@ -4,6 +4,7 @@ "version": "1.0.0", "description": "Quickstart for the Microsoft Speech SDK on Node.js", "main": "index.js", + "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, @@ -11,6 +12,6 @@ "license": "MIT", "dependencies": { "https-proxy-agent": "^3.0.0", - "microsoft-cognitiveservices-speech-sdk": "^1.33.0" + "microsoft-cognitiveservices-speech-sdk": "^1.33.1" } } diff --git a/quickstart/javascript/node/meeting-transcription/index.js b/quickstart/javascript/node/meeting-transcription/index.js index eac60d45a..5729559f7 100644 --- a/quickstart/javascript/node/meeting-transcription/index.js +++ b/quickstart/javascript/node/meeting-transcription/index.js @@ -1,99 +1,94 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -(function() { - "use strict"; - // pull in the required packages. - var sdk = require("microsoft-cognitiveservices-speech-sdk"); - var fs = require("fs"); - - // replace with your own subscription key, - // service region (e.g., "centralus"), and - // the name of the file you want to transcribe - // through the meeting transcriber. - var subscriptionKey = "YourSubscriptionKey"; - var serviceRegion = "YourServiceRegion"; // e.g., "centralus" - var filename = "YourAudioFile.wav"; // 8-channel audio - - // create the push stream we need for the speech sdk. - var pushStream = sdk.AudioInputStream.createPushStream(sdk.AudioStreamFormat.getWaveFormatPCM(16000, 16, 8)) - - // open the file and push it to the push stream. - fs.createReadStream(filename).on('data', function(arrayBuffer) { - pushStream.write(arrayBuffer.slice()); - }).on('end', function() { - pushStream.close(); - }); - - // we are done with the setup - console.log("Transcribing from: " + filename); - // now create the audio-config pointing to our stream and - // the speech config specifying the language. - var speechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(subscriptionKey, serviceRegion); - var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as fs from "fs"; + +// replace with your own subscription key, +// service region (e.g., "centralus"), and +// the name of the file you want to transcribe +// through the meeting transcriber. +var subscriptionKey = "YourSubscriptionKey"; +var serviceRegion = "YourServiceRegion"; // e.g., "centralus" +var filename = "YourAudioFile.wav"; // 8-channel audio + +// create the push stream we need for the speech sdk. +var pushStream = sdk.AudioInputStream.createPushStream(sdk.AudioStreamFormat.getWaveFormatPCM(16000, 16, 8)) - // setting the recognition language to English. - speechTranslationConfig.speechRecognitionLanguage = "en-US"; +// open the file and push it to the push stream. +fs.createReadStream(filename).on('data', function(arrayBuffer) { + pushStream.write(arrayBuffer.slice()); +}).on('end', function() { + pushStream.close(); +}); - // create the meeting object tracking participants - var meeting = sdk.Meeting.createMeetingAsync(speechTranslationConfig, "myMeeting"); +// we are done with the setup +console.log("Transcribing from: " + filename); +// now create the audio-config pointing to our stream and +// the speech config specifying the language. +var speechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(subscriptionKey, serviceRegion); +var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream); - // create the meeting transcriber. - var transcriber = new sdk.MeetingTranscriber(audioConfig); +// setting the recognition language to English. +speechTranslationConfig.speechRecognitionLanguage = "en-US"; - // attach the transcriber to the meeting - transcriber.joinMeetingAsync(meeting, - function () { - // add first participant with voice signature from enrollment step - var voiceSignatureUser1 = "{" + - "Version: 0," + - "Tag: \"<>\"," + - "Data: \"<>\"" + - "}"; - var user1 = sdk.Participant.From("user1@example.com", "en-us", voiceSignatureUser1); - meeting.addParticipantAsync(user1, - function () { - // add second participant with voice signature from enrollment step - var voiceSignatureUser2 = "{" + - "Version: 0," + - "Tag: \"<>\"," + - "Data: \"<>\"" + - "}"; - var user2 = sdk.Participant.From("user2@example.com", "en-us", voiceSignatureUser2); - meeting.addParticipantAsync(user2, - function () { - transcriber.sessionStarted = function(s, e) { - console.log("(sessionStarted)"); - }; - transcriber.sessionStopped = function(s, e) { - console.log("(sessionStopped)"); - }; - transcriber.canceled = function(s, e) { - console.log("(canceled)"); - }; - transcriber.transcribed = function(s, e) { - console.log("(transcribed) text: " + e.result.text); - console.log("(transcribed) speakerId: " + e.result.speakerId); - }; +// create the meeting object tracking participants +var meeting = sdk.Meeting.createMeetingAsync(speechTranslationConfig, "myMeeting"); - // Begin meeting transcription - transcriber.startTranscribingAsync( - function () { }, - function (err) { - console.trace("err - starting transcription: " + err); - }); - }, - function (err) { - console.trace("err - adding user1: " + err); - }); +// create the meeting transcriber. +var transcriber = new sdk.MeetingTranscriber(audioConfig); + +// attach the transcriber to the meeting +transcriber.joinMeetingAsync(meeting, + function () { + // add first participant with voice signature from enrollment step + var voiceSignatureUser1 = "{" + + "Version: 0," + + "Tag: \"<>\"," + + "Data: \"<>\"" + + "}"; + var user1 = sdk.Participant.From("user1@example.com", "en-us", voiceSignatureUser1); + meeting.addParticipantAsync(user1, + function () { + // add second participant with voice signature from enrollment step + var voiceSignatureUser2 = "{" + + "Version: 0," + + "Tag: \"<>\"," + + "Data: \"<>\"" + + "}"; + var user2 = sdk.Participant.From("user2@example.com", "en-us", voiceSignatureUser2); + meeting.addParticipantAsync(user2, + function () { + transcriber.sessionStarted = function(s, e) { + console.log("(sessionStarted)"); + }; + transcriber.sessionStopped = function(s, e) { + console.log("(sessionStopped)"); + }; + transcriber.canceled = function(s, e) { + console.log("(canceled)"); + }; + transcriber.transcribed = function(s, e) { + console.log("(transcribed) text: " + e.result.text); + console.log("(transcribed) speakerId: " + e.result.speakerId); + }; + + // Begin meeting transcription + transcriber.startTranscribingAsync( + function () { }, + function (err) { + console.trace("err - starting transcription: " + err); + }); }, function (err) { - console.trace("err - adding user2: " + err); + console.trace("err - adding user1: " + err); }); }, function (err) { - console.trace("err - " + err); + console.trace("err - adding user2: " + err); }); - -}()); +}, +function (err) { + console.trace("err - " + err); +}); diff --git a/quickstart/javascript/node/meeting-transcription/package.json b/quickstart/javascript/node/meeting-transcription/package.json index e15763069..e270e5514 100644 --- a/quickstart/javascript/node/meeting-transcription/package.json +++ b/quickstart/javascript/node/meeting-transcription/package.json @@ -4,6 +4,7 @@ "version": "1.0.0", "description": "Quickstart for the Microsoft Speech SDK on Node.js", "main": "index.js", + "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, @@ -11,6 +12,6 @@ "license": "MIT", "dependencies": { "https-proxy-agent": "^3.0.0", - "microsoft-cognitiveservices-speech-sdk": "^1.33.0" + "microsoft-cognitiveservices-speech-sdk": "^1.33.1" } } diff --git a/quickstart/javascript/node/speaker-recognition/identification/independent-identification.js b/quickstart/javascript/node/speaker-recognition/identification/independent-identification.js index 894327f70..8a6056480 100644 --- a/quickstart/javascript/node/speaker-recognition/identification/independent-identification.js +++ b/quickstart/javascript/node/speaker-recognition/identification/independent-identification.js @@ -1,54 +1,50 @@ -(async function() { - "use strict"; - - // pull in the required packages. - const sdk = require("microsoft-cognitiveservices-speech-sdk"); - const fs = require("fs"); - - // replace with your own subscription key, - // service region (e.g., "westus"), and - // the name of the files you want to use - // to enroll and then identify the speaker. - const subscriptionKey = "YourSubscriptionKey"; - const serviceRegion = "YourSubscriptionRegion"; // e.g., "westus" - const enrollFile = "aboutSpeechSdk.wav"; // 16000 Hz, Mono - const identificationFile = "TalkForAFewSeconds16.wav"; // 16000 Hz, Mono - - // now create the speech config with the credentials for the subscription - const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); - const client = new sdk.VoiceProfileClient(speechConfig); - const locale = "en-us"; - - // we are done with the setup - try { - const profile = await client.createProfileAsync(sdk.VoiceProfileType.TextIndependentIdentification, locale); - const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(enrollFile)); - - console.log("Profile id: " + profile.profileId +" created, now enrolling using file: " + enrollFile); - const enrollResult = await client.enrollProfileAsync(profile, audioConfig); - - console.log("(Enrollment result) Reason: " + sdk.ResultReason[enrollResult.reason]); - const idConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(identificationFile)); - const recognizer = new sdk.SpeakerRecognizer(speechConfig, idConfig); - const model = sdk.SpeakerIdentificationModel.fromProfiles([profile]); - const identificationResult = await recognizer.recognizeOnceAsync(model); - - var reason = identificationResult.reason; - console.log("(Identification result) Reason: " + sdk.ResultReason[reason]); - - if( reason === sdk.ResultReason.Canceled ) { - const cancellationDetails = sdk.SpeakerRecognitionCancellationDetails.fromResult(identificationResult); - console.log("(Identification canceled) Error Details: " + cancellationDetails.errorDetails); - console.log("(Identification canceled) Error Code: " + cancellationDetails.errorCode); - } else { - console.log("(Identification result) Profile Id: " + identificationResult.profileId); - console.log("(Identification result) Score: " + identificationResult.score); - } - - const deleteResult = await client.deleteProfileAsync(profile); - console.log("(Delete profile result) Reason: " + sdk.ResultReason[deleteResult.reason]); - - } catch (err) { - console.log("ERROR during operation: " + err); +// pull in the required packages. +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as fs from "fs"; + +// replace with your own subscription key, +// service region (e.g., "westus"), and +// the name of the files you want to use +// to enroll and then identify the speaker. +const subscriptionKey = "YourSubscriptionKey"; +const serviceRegion = "YourSubscriptionRegion"; // e.g., "westus" +const enrollFile = "aboutSpeechSdk.wav"; // 16000 Hz, Mono +const identificationFile = "TalkForAFewSeconds16.wav"; // 16000 Hz, Mono + +// now create the speech config with the credentials for the subscription +const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); +const client = new sdk.VoiceProfileClient(speechConfig); +const locale = "en-us"; + +// we are done with the setup +try { + const profile = await client.createProfileAsync(sdk.VoiceProfileType.TextIndependentIdentification, locale); + const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(enrollFile)); + + console.log("Profile id: " + profile.profileId +" created, now enrolling using file: " + enrollFile); + const enrollResult = await client.enrollProfileAsync(profile, audioConfig); + + console.log("(Enrollment result) Reason: " + sdk.ResultReason[enrollResult.reason]); + const idConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(identificationFile)); + const recognizer = new sdk.SpeakerRecognizer(speechConfig, idConfig); + const model = sdk.SpeakerIdentificationModel.fromProfiles([profile]); + const identificationResult = await recognizer.recognizeOnceAsync(model); + + var reason = identificationResult.reason; + console.log("(Identification result) Reason: " + sdk.ResultReason[reason]); + + if( reason === sdk.ResultReason.Canceled ) { + const cancellationDetails = sdk.SpeakerRecognitionCancellationDetails.fromResult(identificationResult); + console.log("(Identification canceled) Error Details: " + cancellationDetails.errorDetails); + console.log("(Identification canceled) Error Code: " + cancellationDetails.errorCode); + } else { + console.log("(Identification result) Profile Id: " + identificationResult.profileId); + console.log("(Identification result) Score: " + identificationResult.score); } -}()); + + const deleteResult = await client.deleteProfileAsync(profile); + console.log("(Delete profile result) Reason: " + sdk.ResultReason[deleteResult.reason]); + +} catch (err) { + console.log("ERROR during operation: " + err); +} diff --git a/quickstart/javascript/node/speaker-recognition/identification/package.json b/quickstart/javascript/node/speaker-recognition/identification/package.json index dbc0ab8b1..02b3c5bdf 100644 --- a/quickstart/javascript/node/speaker-recognition/identification/package.json +++ b/quickstart/javascript/node/speaker-recognition/identification/package.json @@ -4,6 +4,7 @@ "version": "1.0.0", "description": "Quickstart for the Microsoft Speech SDK on Node.js", "main": "independent-identification.js", + "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, @@ -11,6 +12,6 @@ "license": "MIT", "dependencies": { "https-proxy-agent": "^3.0.0", - "microsoft-cognitiveservices-speech-sdk": "^1.33.0" + "microsoft-cognitiveservices-speech-sdk": "^1.33.1" } } diff --git a/quickstart/javascript/node/speaker-recognition/verification/dependent-verification.js b/quickstart/javascript/node/speaker-recognition/verification/dependent-verification.js index f2e514e7f..f14799c67 100644 --- a/quickstart/javascript/node/speaker-recognition/verification/dependent-verification.js +++ b/quickstart/javascript/node/speaker-recognition/verification/dependent-verification.js @@ -1,63 +1,58 @@ -(async function() { - "use strict"; - - // pull in the required packages. - const sdk = require("microsoft-cognitiveservices-speech-sdk"); - const fs = require("fs"); +// pull in the required packages. +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as fs from "fs"; - // replace with your own subscription key, - // service region (e.g., "westus"), and - // the name of the files you want to use - // to enroll and then verify the speaker. - // Note that three different samples are - // necessary to enroll for verification. - const subscriptionKey = "YourSubscriptionKey"; - const serviceRegion = "YourSubscriptionRegion"; // e.g., "westus" - const enrollFiles = ["myVoiceIsMyPassportVerifyMe01.wav","myVoiceIsMyPassportVerifyMe02.wav","myVoiceIsMyPassportVerifyMe03.wav"]; // 16000 Hz, Mono - const verificationFile = "myVoiceIsMyPassportVerifyMe04.wav"; // 16000 Hz, Mono - - // now create the speech config with the credentials for the subscription - const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); - const client = new sdk.VoiceProfileClient(speechConfig); - const locale = "en-us"; +// replace with your own subscription key, +// service region (e.g., "westus"), and +// the name of the files you want to use +// to enroll and then verify the speaker. +// Note that three different samples are +// necessary to enroll for verification. +const subscriptionKey = "YourSubscriptionKey"; +const serviceRegion = "YourSubscriptionRegion"; // e.g., "westus" +const enrollFiles = ["myVoiceIsMyPassportVerifyMe01.wav","myVoiceIsMyPassportVerifyMe02.wav","myVoiceIsMyPassportVerifyMe03.wav"]; // 16000 Hz, Mono +const verificationFile = "myVoiceIsMyPassportVerifyMe04.wav"; // 16000 Hz, Mono - const getAudioConfigFromFile = function (file) { - return sdk.AudioConfig.fromWavFileInput(fs.readFileSync(file)); - }; +// now create the speech config with the credentials for the subscription +const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); +const client = new sdk.VoiceProfileClient(speechConfig); +const locale = "en-us"; - try { - // we are done with the setup, so create a dependent verification specific profile - const profile = await client.createProfileAsync(sdk.VoiceProfileType.TextDependentVerification, locale); +const getAudioConfigFromFile = function (file) { + return sdk.AudioConfig.fromWavFileInput(fs.readFileSync(file)); +}; - console.log("Profile id: " + profile.profileId +" created, now enrolling using files beginning with: " + enrollFiles[0]); - // create audio configs for each of the enrollment files to use for each of the enrollment steps - for (const enrollFile of enrollFiles) { - const enrollConfig = getAudioConfigFromFile(enrollFile); - const enrollResult = await client.enrollProfileAsync(profile, enrollConfig); - console.log("(Enrollment result) Reason: " + sdk.ResultReason[enrollResult.reason]); - } - const verificationConfig = getAudioConfigFromFile(verificationFile); - const recognizer = new sdk.SpeakerRecognizer(speechConfig, verificationConfig); - - // For verification scenarios, create a SpeakerVerificationModel. (Note that identification scenarios use a different type and API here.) - const model = sdk.SpeakerVerificationModel.fromProfile(profile); - const verificationResult = await recognizer.recognizeOnceAsync(model); - const reason = verificationResult.reason; - console.log("(Verification result) Reason: " + sdk.ResultReason[reason]); - if( reason === sdk.ResultReason.Canceled ) { - const cancellationDetails = sdk.SpeakerRecognitionCancellationDetails.fromResult(verificationResult); - console.log("(Verification canceled) Error Details: " + cancellationDetails.errorDetails); - console.log("(Verification canceled) Error Code: " + cancellationDetails.errorCode); - } else { - console.log("(Verification result) Profile Id: " + verificationResult.profileId); - console.log("(Verification result) Score: " + verificationResult.score); - } +try { + // we are done with the setup, so create a dependent verification specific profile + const profile = await client.createProfileAsync(sdk.VoiceProfileType.TextDependentVerification, locale); - // Delete voice profile after we're done with this scenario - const deleteResult = await client.deleteProfileAsync(profile); - console.log("(Delete profile result) Reason: " + sdk.ResultReason[deleteResult.reason]); - } catch (err) { - console.log("ERROR: " + err); + console.log("Profile id: " + profile.profileId +" created, now enrolling using files beginning with: " + enrollFiles[0]); + // create audio configs for each of the enrollment files to use for each of the enrollment steps + for (const enrollFile of enrollFiles) { + const enrollConfig = getAudioConfigFromFile(enrollFile); + const enrollResult = await client.enrollProfileAsync(profile, enrollConfig); + console.log("(Enrollment result) Reason: " + sdk.ResultReason[enrollResult.reason]); } + const verificationConfig = getAudioConfigFromFile(verificationFile); + const recognizer = new sdk.SpeakerRecognizer(speechConfig, verificationConfig); -}()); + // For verification scenarios, create a SpeakerVerificationModel. (Note that identification scenarios use a different type and API here.) + const model = sdk.SpeakerVerificationModel.fromProfile(profile); + const verificationResult = await recognizer.recognizeOnceAsync(model); + const reason = verificationResult.reason; + console.log("(Verification result) Reason: " + sdk.ResultReason[reason]); + if( reason === sdk.ResultReason.Canceled ) { + const cancellationDetails = sdk.SpeakerRecognitionCancellationDetails.fromResult(verificationResult); + console.log("(Verification canceled) Error Details: " + cancellationDetails.errorDetails); + console.log("(Verification canceled) Error Code: " + cancellationDetails.errorCode); + } else { + console.log("(Verification result) Profile Id: " + verificationResult.profileId); + console.log("(Verification result) Score: " + verificationResult.score); + } + + // Delete voice profile after we're done with this scenario + const deleteResult = await client.deleteProfileAsync(profile); + console.log("(Delete profile result) Reason: " + sdk.ResultReason[deleteResult.reason]); +} catch (err) { + console.log("ERROR: " + err); +} diff --git a/quickstart/javascript/node/speaker-recognition/verification/package.json b/quickstart/javascript/node/speaker-recognition/verification/package.json index 758196679..3302dfa2b 100644 --- a/quickstart/javascript/node/speaker-recognition/verification/package.json +++ b/quickstart/javascript/node/speaker-recognition/verification/package.json @@ -4,6 +4,7 @@ "version": "1.0.0", "description": "Quickstart for the Microsoft Speech SDK on Node.js", "main": "dependent-verification.js", + "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, @@ -11,6 +12,6 @@ "license": "MIT", "dependencies": { "https-proxy-agent": "^3.0.0", - "microsoft-cognitiveservices-speech-sdk": "^1.33.0" + "microsoft-cognitiveservices-speech-sdk": "^1.33.1" } } diff --git a/quickstart/javascript/node/text-to-speech/index.js b/quickstart/javascript/node/text-to-speech/index.js index ba60ee7d3..b41d20ab0 100644 --- a/quickstart/javascript/node/text-to-speech/index.js +++ b/quickstart/javascript/node/text-to-speech/index.js @@ -1,58 +1,53 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -(function() { - // - "use strict"; - - // pull in the required packages. - var sdk = require("microsoft-cognitiveservices-speech-sdk"); - var readline = require("readline"); - - // replace with your own subscription key, - // service region (e.g., "westus"), and - // the name of the file you save the synthesized audio. - var subscriptionKey = "YourSubscriptionKey"; - var serviceRegion = "YourServiceRegion"; // e.g., "westus" - var filename = "YourAudioFile.wav"; - - // we are done with the setup - - // now create the audio-config pointing to our stream and - // the speech config specifying the language. - var audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename); - var speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); - - // create the speech synthesizer. - var synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig); - - var rl = readline.createInterface({ - input: process.stdin, - output: process.stdout - }); +// + +// pull in the required packages. +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as readline from "readline"; + +// replace with your own subscription key, +// service region (e.g., "westus"), and +// the name of the file you save the synthesized audio. +var subscriptionKey = "YourSubscriptionKey"; +var serviceRegion = "YourServiceRegion"; // e.g., "westus" +var filename = "YourAudioFile.wav"; + +// we are done with the setup + +// now create the audio-config pointing to our stream and +// the speech config specifying the language. +var audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename); +var speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion); + +// create the speech synthesizer. +var synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig); + +var rl = readline.createInterface({ + input: process.stdin, + output: process.stdout +}); - rl.question("Type some text that you want to speak...\n> ", function (text) { - rl.close(); - // start the synthesizer and wait for a result. - synthesizer.speakTextAsync(text, - function (result) { - if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) { - console.log("synthesis finished."); - } else { - console.error("Speech synthesis canceled, " + result.errorDetails + - "\nDid you update the subscription info?"); - } - synthesizer.close(); - synthesizer = undefined; - }, - function (err) { - console.trace("err - " + err); - synthesizer.close(); - synthesizer = undefined; - }); - console.log("Now synthesizing to: " + filename); +rl.question("Type some text that you want to speak...\n> ", function (text) { + rl.close(); + // start the synthesizer and wait for a result. + synthesizer.speakTextAsync(text, + function (result) { + if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) { + console.log("synthesis finished."); + } else { + console.error("Speech synthesis canceled, " + result.errorDetails + + "\nDid you update the subscription info?"); + } + synthesizer.close(); + synthesizer = undefined; + }, + function (err) { + console.trace("err - " + err); + synthesizer.close(); + synthesizer = undefined; }); - // - -}()); - \ No newline at end of file + console.log("Now synthesizing to: " + filename); +}); +// diff --git a/quickstart/javascript/node/text-to-speech/package.json b/quickstart/javascript/node/text-to-speech/package.json index 6b8d6fe5a..814019503 100644 --- a/quickstart/javascript/node/text-to-speech/package.json +++ b/quickstart/javascript/node/text-to-speech/package.json @@ -4,6 +4,7 @@ "version": "1.0.0", "description": "Text-to-speech quickstart for the Microsoft Speech SDK on Node.js", "main": "index.js", + "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, @@ -11,7 +12,7 @@ "license": "MIT", "dependencies": { "https-proxy-agent": "^3.0.0", - "microsoft-cognitiveservices-speech-sdk": "^1.33.0", + "microsoft-cognitiveservices-speech-sdk": "^1.33.1", "readline": "^1.3.0" } } diff --git a/samples/js/node/filePushStream.js b/samples/js/node/filePushStream.js index 4a9ddd5f9..227024a14 100644 --- a/samples/js/node/filePushStream.js +++ b/samples/js/node/filePushStream.js @@ -2,11 +2,8 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -var fs = require("fs"); - -(function() { -"use strict"; +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as fs from "fs"; function ReadInt32(fd) { var buffer = Buffer.alloc(4); @@ -44,78 +41,75 @@ function ReadString(fd, length) { return buffer.toString(); } - module.exports = { - openPushStream: function(filename) { - - // Get the wave header for the file. - var wavFileHeader = module.exports.readWavFileHeader(filename); - - var format; - - switch (wavFileHeader.tag) - { - case 1: // PCM - format = sdk.AudioFormatTag.PCM; - break; - case 6: - format = sdk.AudioFormatTag.ALaw; - break; - case 7: - format = sdk.AudioFormatTag.MuLaw; - break; - default: - throw new Error("Wave format " + wavFileHeader.tag + " is not supported"); - } - - // Create the format for PCM Audio. - var format = sdk.AudioStreamFormat.getWaveFormat(wavFileHeader.framerate, wavFileHeader.bitsPerSample, wavFileHeader.nChannels, format); - - // create the push stream we need for the speech sdk. - var pushStream = sdk.AudioInputStream.createPushStream(format); - - // open the file and push it to the push stream. - // Notice: we skip 44 bytes for the typical wav header. - fs.createReadStream(filename, { start: 44}).on('data', function(arrayBuffer) { - pushStream.write(arrayBuffer.slice()); - }).on('end', function() { - pushStream.close(); - }); - - return pushStream; - }, - - readWavFileHeader: function (audioFileName) { - var fd = fs.openSync(audioFileName, 'r'); - - if(ReadString(fd, 4) != "RIFF") { - throw "Error reading .wav file header. Expected 'RIFF' tag."; - } - // File length - ReadInt32(fd); - if(ReadString(fd, 4) != "WAVE") { - throw "Error reading .wav file header. Expected 'WAVE' tag."; - } - if(ReadString(fd, 4) != "fmt ") { - throw "Error reading .wav file header. Expected 'fmt ' tag."; - } - // Format size - var formatSize = ReadInt32(fd); - if (formatSize > 16) { - throw "Error reading .wav file header. Expected format size 16 bytes. Actual size: " + String(formatSize); - } - // Format tag - var tag = ReadUInt16(fd); - var nChannels = ReadUInt16(fd); - var framerate = ReadUInt32(fd); - // Average bytes per second - ReadUInt32(fd); - // Block align - ReadUInt16(fd); - var bitsPerSample = ReadUInt16(fd); - - fs.closeSync(fd); - - return { framerate : framerate, bitsPerSample : bitsPerSample, nChannels : nChannels, tag: tag }; - } +export const openPushStream = (filename) => { + + // Get the wave header for the file. + var wavFileHeader = readWavFileHeader(filename); + + var format; + + switch (wavFileHeader.tag) + { + case 1: // PCM + format = sdk.AudioFormatTag.PCM; + break; + case 6: + format = sdk.AudioFormatTag.ALaw; + break; + case 7: + format = sdk.AudioFormatTag.MuLaw; + break; + default: + throw new Error("Wave format " + wavFileHeader.tag + " is not supported"); + } + + // Create the format for PCM Audio. + var format = sdk.AudioStreamFormat.getWaveFormat(wavFileHeader.framerate, wavFileHeader.bitsPerSample, wavFileHeader.nChannels, format); + + // create the push stream we need for the speech sdk. + var pushStream = sdk.AudioInputStream.createPushStream(format); + + // open the file and push it to the push stream. + // Notice: we skip 44 bytes for the typical wav header. + fs.createReadStream(filename, { start: 44}).on('data', function(arrayBuffer) { + pushStream.write(arrayBuffer.slice()); + }).on('end', function() { + pushStream.close(); + }); + + return pushStream; +}; + +export const readWavFileHeader = (audioFileName) => { + var fd = fs.openSync(audioFileName, 'r'); + + if(ReadString(fd, 4) != "RIFF") { + throw "Error reading .wav file header. Expected 'RIFF' tag."; + } + // File length + ReadInt32(fd); + if(ReadString(fd, 4) != "WAVE") { + throw "Error reading .wav file header. Expected 'WAVE' tag."; } -}()); \ No newline at end of file + if(ReadString(fd, 4) != "fmt ") { + throw "Error reading .wav file header. Expected 'fmt ' tag."; + } + // Format size + var formatSize = ReadInt32(fd); + if (formatSize > 16) { + throw "Error reading .wav file header. Expected format size 16 bytes. Actual size: " + String(formatSize); + } + // Format tag + var tag = ReadUInt16(fd); + var nChannels = ReadUInt16(fd); + var framerate = ReadUInt32(fd); + // Average bytes per second + ReadUInt32(fd); + // Block align + ReadUInt16(fd); + var bitsPerSample = ReadUInt16(fd); + + fs.closeSync(fd); + + return { framerate : framerate, bitsPerSample : bitsPerSample, nChannels : nChannels, tag: tag }; +}; diff --git a/samples/js/node/index.js b/samples/js/node/index.js index 14b9d7f3b..ce180f393 100644 --- a/samples/js/node/index.js +++ b/samples/js/node/index.js @@ -1,68 +1,63 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -(function() { - "use strict"; - - var settings = require("./settings"); - var speech = require("./speech"); - var intent = require("./intent"); - var translate = require("./translation"); - var synthesis = require("./synthesis"); - var pronunciationAssessmentContinue = require("./pronunciationAssessmentContinue"); - var pronunciationAssessment = require("./pronunciationAssessment"); - var pronunciationAssessmentConfiguredWithJson = require("./pronunciationAssessmentConfiguredWithJson"); - var pronunciationAssessmentFromMicrophone = require("./pronunciationAssessmentFromMicrophone"); - - if (process.argv.length > 3) { - settings.filename = process.argv[3]; - } - - if (process.argv.length > 2) { - switch (process.argv[2]) { - case "intent": - console.log("Now recognizing intent from: " + settings.filename); - intent.main(settings); - break; - - case "translate": - console.log("Now translating from: " + settings.filename); - translate.main(settings); - break; +import * as settings from "./settings.js"; +import * as speech from "./speech.js"; +import * as intent from "./intent.js"; +import * as translate from "./translation.js"; +import * as synthesis from "./synthesis.js"; +import * as pronunciationAssessmentContinue from "./pronunciationAssessmentContinue.js"; +import * as pronunciationAssessment from "./pronunciationAssessment.js"; +import * as pronunciationAssessmentConfiguredWithJson from "./pronunciationAssessmentConfiguredWithJson.js"; +import * as pronunciationAssessmentFromMicrophone from "./pronunciationAssessmentFromMicrophone.js"; - case "synthesis": - console.log("Now synthesizing to: " + settings.filename); - synthesis.main(settings, settings.filename); - break; - - case "pronunciationAssessmentContinue": - console.log("Now pronunciation assessment to: " + settings.filename); - pronunciationAssessmentContinue.main(settings); - break; +if (process.argv.length > 3) { + settings.filename = process.argv[3]; +} - case "pronunciationAssessment": - console.log("Now pronunciation assessment to: " + settings.filename); - pronunciationAssessment.main(settings); - break; +if (process.argv.length > 2) { + switch (process.argv[2]) { + case "intent": + console.log("Now recognizing intent from: " + settings.filename); + intent.main(settings); + break; - case "pronunciationAssessmentConfiguredWithJson": - console.log("Now pronunciation assessment to: " + settings.filename); - pronunciationAssessmentConfiguredWithJson.main(settings); - break; + case "translate": + console.log("Now translating from: " + settings.filename); + translate.main(settings); + break; - case "pronunciationAssessmentFromMicrophone": - pronunciationAssessmentFromMicrophone.main(settings); - break; + case "synthesis": + console.log("Now synthesizing to: " + settings.filename); + synthesis.main(settings, settings.filename); + break; + + case "pronunciationAssessmentContinue": + console.log("Now pronunciation assessment to: " + settings.filename); + pronunciationAssessmentContinue.main(settings); + break; - case "speech": - default: - console.log("Now recognizing speech from: " + settings.filename); - speech.main(settings); - break; - } - } - else { - console.log("usage: index.js [speech|intent|translate|synthesis|pronunciationAssessment|pronunciationAssessmentContinue|pronunciationAssessmentConfiguredWithJson|pronunciationAssessmentFromMicrophone] {filename}"); + case "pronunciationAssessment": + console.log("Now pronunciation assessment to: " + settings.filename); + pronunciationAssessment.main(settings); + break; + + case "pronunciationAssessmentConfiguredWithJson": + console.log("Now pronunciation assessment to: " + settings.filename); + pronunciationAssessmentConfiguredWithJson.main(settings); + break; + + case "pronunciationAssessmentFromMicrophone": + pronunciationAssessmentFromMicrophone.main(settings); + break; + + case "speech": + default: + console.log("Now recognizing speech from: " + settings.filename); + speech.main(settings); + break; } -}()); - \ No newline at end of file +} +else { + console.log("usage: index.js [speech|intent|translate|synthesis|pronunciationAssessment|pronunciationAssessmentContinue|pronunciationAssessmentConfiguredWithJson|pronunciationAssessmentFromMicrophone] {filename}"); +} diff --git a/samples/js/node/intent.js b/samples/js/node/intent.js index 62b81a52d..eac87d7d5 100644 --- a/samples/js/node/intent.js +++ b/samples/js/node/intent.js @@ -2,44 +2,40 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -var filePushStream = require("./filePushStream"); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as filePushStream from "./filePushStream.js"; -(function() { -"use strict"; +export const main = (settings) => { - module.exports = { - main: function(settings) { + // now create the audio-config pointing to our stream and + // the speech config specifying the language. + var audioStream = filePushStream.openPushStream(settings.filename); + var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream); + var intentConfig = sdk.SpeechConfig.fromSubscription(settings.luSubscriptionKey, settings.luServiceRegion); - // now create the audio-config pointing to our stream and - // the speech config specifying the language. - var audioStream = filePushStream.openPushStream(settings.filename); - var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream); - var intentConfig = sdk.SpeechConfig.fromSubscription(settings.luSubscriptionKey, settings.luServiceRegion); + // setting the recognition language to English. + intentConfig.speechRecognitionLanguage = settings.language; - // setting the recognition language to English. - intentConfig.speechRecognitionLanguage = settings.language; + // create the translation recognizer. + var recognizer = new sdk.IntentRecognizer(intentConfig, audioConfig); - // create the translation recognizer. - var recognizer = new sdk.IntentRecognizer(intentConfig, audioConfig); - - // Set up a Language Understanding Model from Language Understanding Intelligent Service (LUIS). - // See https://www.luis.ai/home for more information on LUIS. - if (settings.luAppId !== "" && settings.luAppId !== "YourLanguageUnderstandingAppId") { + // Set up a Language Understanding Model from Language Understanding Intelligent Service (LUIS). + // See https://www.luis.ai/home for more information on LUIS. + if (settings.luAppId !== "" && settings.luAppId !== "YourLanguageUnderstandingAppId") { var lm = sdk.LanguageUnderstandingModel.fromAppId(settings.luAppId); recognizer.addAllIntents(lm); - } - - // Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. + } + + // Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. - // The event recognizing signals that an intermediate recognition result is received. - // You will receive one or more recognizing events as a speech phrase is recognized, with each containing - // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. - recognizer.recognizing = function (s, e) { + // The event recognizing signals that an intermediate recognition result is received. + // You will receive one or more recognizing events as a speech phrase is recognized, with each containing + // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. + recognizer.recognizing = function (s, e) { var str = "(recognizing) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text; console.log(str); - }; + }; // The event signals that the service has stopped processing speech. // https://docs.microsoft.com/javascript/api/microsoft-cognitiveservices-speech-sdk/speechrecognitioncanceledeventargs?view=azure-node-latest @@ -114,14 +110,11 @@ var filePushStream = require("./filePushStream"); // start the recognizer and wait for a result. recognizer.recognizeOnceAsync( function (result) { - recognizer.close(); - recognizer = undefined; + recognizer.close(); + recognizer = undefined; }, function (err) { - recognizer.close(); - recognizer = undefined; + recognizer.close(); + recognizer = undefined; }); - } - - } -}()); +}; diff --git a/samples/js/node/micSTT.js b/samples/js/node/micSTT.js index 4c4e63445..54e964db2 100644 --- a/samples/js/node/micSTT.js +++ b/samples/js/node/micSTT.js @@ -1,10 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -const MicToSpeech = require("mic-to-speech"); -const sdk = require("microsoft-cognitiveservices-speech-sdk"); -const wav = require("wav"); -const settings = require('./settings'); +import * as MicToSpeech from "mic-to-speech"; +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as wav from "wav"; +import * as settings from "./settings.js"; const API_KEY = settings.subscriptionKey; const region = settings.serviceRegion; diff --git a/samples/js/node/package.json b/samples/js/node/package.json index 85a7bc7c8..df9f675b6 100644 --- a/samples/js/node/package.json +++ b/samples/js/node/package.json @@ -4,6 +4,7 @@ "version": "1.0.0", "description": "Samples for the Microsoft Speech SDK on Node.js", "main": "index.js", + "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, @@ -16,7 +17,7 @@ "lodash.foreach": "^4.5.0", "lodash.sum": "^4.0.2", "mic-to-speech": "^1.0.1", - "microsoft-cognitiveservices-speech-sdk": "^1.24.1", + "microsoft-cognitiveservices-speech-sdk": "^1.33.1", "readline": "^1.3.0", "segment": "^0.1.3", "wav": "^1.0.2" diff --git a/samples/js/node/pronunciationAssessment.js b/samples/js/node/pronunciationAssessment.js index 6d95c2a73..9d0d5ff7a 100644 --- a/samples/js/node/pronunciationAssessment.js +++ b/samples/js/node/pronunciationAssessment.js @@ -2,55 +2,49 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -const _ = require('lodash'); -var fs = require("fs"); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as _ from "lodash"; +import * as fs from "fs"; -(function() { - "use strict"; - - // pronunciation assessment with audio file - module.exports = { - main: function(settings) { - var audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(settings.filename)); - var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); +// pronunciation assessment with audio file +export const main = (settings) => { + var audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(settings.filename)); + var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - var reference_text = "What's the weather like?"; - // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. - const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig( - reference_text, - sdk.PronunciationAssessmentGradingSystem.HundredMark, - sdk.PronunciationAssessmentGranularity.Phoneme, - true - ); + var reference_text = "What's the weather like?"; + // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. + const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig( + reference_text, + sdk.PronunciationAssessmentGradingSystem.HundredMark, + sdk.PronunciationAssessmentGranularity.Phoneme, + true + ); - // setting the recognition language to English. - speechConfig.speechRecognitionLanguage = settings.language; + // setting the recognition language to English. + speechConfig.speechRecognitionLanguage = settings.language; - // create the speech recognizer. - var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); - pronunciationAssessmentConfig.applyTo(reco); + // create the speech recognizer. + var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); + pronunciationAssessmentConfig.applyTo(reco); - function onRecognizedResult(result) { - console.log("pronunciation assessment for: ", result.text); - var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(result); - console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', - "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', - "completeness score : ", pronunciation_result.completenessScore, '\n', - "fluency score: ", pronunciation_result.fluencyScore - ); - console.log(" Word-level details:"); - _.forEach(pronunciation_result.detailResult.Words, (word, idx) => { - console.log(" ", idx + 1, ": word: ", word.Word, "\taccuracy score: ", word.PronunciationAssessment.AccuracyScore, "\terror type: ", word.PronunciationAssessment.ErrorType, ";"); - }); - reco.close(); - } + function onRecognizedResult(result) { + console.log("pronunciation assessment for: ", result.text); + var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(result); + console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', + "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', + "completeness score : ", pronunciation_result.completenessScore, '\n', + "fluency score: ", pronunciation_result.fluencyScore + ); + console.log(" Word-level details:"); + _.forEach(pronunciation_result.detailResult.Words, (word, idx) => { + console.log(" ", idx + 1, ": word: ", word.Word, "\taccuracy score: ", word.PronunciationAssessment.AccuracyScore, "\terror type: ", word.PronunciationAssessment.ErrorType, ";"); + }); + reco.close(); + } - reco.recognizeOnceAsync( - function (successfulResult) { - onRecognizedResult(successfulResult); - } - ) + reco.recognizeOnceAsync( + function (successfulResult) { + onRecognizedResult(successfulResult); } - } -}()); + ) +} diff --git a/samples/js/node/pronunciationAssessmentConfiguredWithJson.js b/samples/js/node/pronunciationAssessmentConfiguredWithJson.js index 5a596f9fd..9fb9811ad 100644 --- a/samples/js/node/pronunciationAssessmentConfiguredWithJson.js +++ b/samples/js/node/pronunciationAssessmentConfiguredWithJson.js @@ -2,56 +2,50 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -const _ = require('lodash'); -var fs = require("fs"); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as _ from "lodash"; +import * as fs from "fs"; -(function() { - "use strict"; - - // pronunciation assessment with audio file - module.exports = { - main: function(settings) { - var audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(settings.filename)); - var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); +// pronunciation assessment with audio file +export const main = (settings) => { + var audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(settings.filename)); + var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - var reference_text = "What's the weather like?"; - // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. - const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig.fromJSON( - "{\"GradingSystem\": \"HundredMark\", \ - \"Granularity\": \"Phoneme\", \ - \"EnableMiscue\": \"True\", \ - \"ScenarioId\": \"[scenario ID will be assigned by product team]\"}" - ); - pronunciationAssessmentConfig.referenceText = reference_text + var reference_text = "What's the weather like?"; + // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. + const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig.fromJSON( + "{\"GradingSystem\": \"HundredMark\", \ + \"Granularity\": \"Phoneme\", \ + \"EnableMiscue\": \"True\", \ + \"ScenarioId\": \"[scenario ID will be assigned by product team]\"}" + ); + pronunciationAssessmentConfig.referenceText = reference_text - // setting the recognition language to English. - speechConfig.speechRecognitionLanguage = settings.language; + // setting the recognition language to English. + speechConfig.speechRecognitionLanguage = settings.language; - // create the speech recognizer. - var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); - pronunciationAssessmentConfig.applyTo(reco); + // create the speech recognizer. + var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); + pronunciationAssessmentConfig.applyTo(reco); - function onRecognizedResult(result) { - console.log("pronunciation assessment for: ", result.text); - var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(result); - console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', - "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', - "completeness score : ", pronunciation_result.completenessScore, '\n', - "fluency score: ", pronunciation_result.fluencyScore - ); - console.log(" Word-level details:"); - _.forEach(pronunciation_result.detailResult.Words, (word, idx) => { - console.log(" ", idx + 1, ": word: ", word.Word, "\taccuracy score: ", word.PronunciationAssessment.AccuracyScore, "\terror type: ", word.PronunciationAssessment.ErrorType, ";"); - }); - reco.close(); - } + function onRecognizedResult(result) { + console.log("pronunciation assessment for: ", result.text); + var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(result); + console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', + "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', + "completeness score : ", pronunciation_result.completenessScore, '\n', + "fluency score: ", pronunciation_result.fluencyScore + ); + console.log(" Word-level details:"); + _.forEach(pronunciation_result.detailResult.Words, (word, idx) => { + console.log(" ", idx + 1, ": word: ", word.Word, "\taccuracy score: ", word.PronunciationAssessment.AccuracyScore, "\terror type: ", word.PronunciationAssessment.ErrorType, ";"); + }); + reco.close(); + } - reco.recognizeOnceAsync( - function (successfulResult) { - onRecognizedResult(successfulResult); - } - ) + reco.recognizeOnceAsync( + function (successfulResult) { + onRecognizedResult(successfulResult); } - } -}()); + ) +} diff --git a/samples/js/node/pronunciationAssessmentContinue.js b/samples/js/node/pronunciationAssessmentContinue.js index 025846004..b63d7a96a 100644 --- a/samples/js/node/pronunciationAssessmentContinue.js +++ b/samples/js/node/pronunciationAssessmentContinue.js @@ -2,169 +2,164 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -const filePushStream = require("./filePushStream"); -const _ = require('lodash') ; -const forEach = require('lodash.foreach'); -const Segment = require('segment'); -var difflib = require('difflib'); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as filePushStream from "./filePushStream.js"; +import * as _ from "lodash"; +import * as Segment from "segment"; +import * as difflib from "difflib"; -(function() { -"use strict"; - // pronunciation assessment with audio streaming and continue mode -module.exports = { - main: function(settings) { +export const main = (settings) => { - // now create the audio-config pointing to our stream and - // the speech config specifying the language. - var wavFileHeader = filePushStream.readWavFileHeader(settings.filename); - var format = sdk.AudioStreamFormat.getWaveFormatPCM(wavFileHeader.framerate, wavFileHeader.bitsPerSample, wavFileHeader.nChannels); - var audioStream = filePushStream.openPushStream(settings.filename); - var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream, format); - var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); +// now create the audio-config pointing to our stream and +// the speech config specifying the language. +var wavFileHeader = filePushStream.readWavFileHeader(settings.filename); +var format = sdk.AudioStreamFormat.getWaveFormatPCM(wavFileHeader.framerate, wavFileHeader.bitsPerSample, wavFileHeader.nChannels); +var audioStream = filePushStream.openPushStream(settings.filename); +var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream, format); +var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - var reference_text = "What's the weather like?"; - // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. - const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig( - reference_text, - sdk.PronunciationAssessmentGradingSystem.HundredMark, - sdk.PronunciationAssessmentGranularity.Phoneme, - true - ); +var reference_text = "What's the weather like?"; +// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. +const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig( + reference_text, + sdk.PronunciationAssessmentGradingSystem.HundredMark, + sdk.PronunciationAssessmentGranularity.Phoneme, + true +); + +// setting the recognition language to English. +speechConfig.speechRecognitionLanguage = settings.language; - // setting the recognition language to English. - speechConfig.speechRecognitionLanguage = settings.language; +// create the speech recognizer. +var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); +pronunciationAssessmentConfig.applyTo(reco); - // create the speech recognizer. - var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); - pronunciationAssessmentConfig.applyTo(reco); +const scoreNumber = { + accuracyScore: 0, + fluencyScore: 0, + compScore: 0, +}; +const allWords = []; +var currentText = []; +var startOffset = 0; +var recognizedWords = []; +var fluencyScores = []; +var durations = []; +var jo = {}; - const scoreNumber = { - accuracyScore: 0, - fluencyScore: 0, - compScore: 0, - }; - const allWords = []; - var currentText = []; - var startOffset = 0; - var recognizedWords = []; - var fluencyScores = []; - var durations = []; - var jo = {}; - - // Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. +// Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. - // The event recognizing signals that an intermediate recognition result is received. - // You will receive one or more recognizing events as a speech phrase is recognized, with each containing - // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. - reco.recognizing = function (s, e) { - var str = "(recognizing) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text; - console.log(str); - }; +// The event recognizing signals that an intermediate recognition result is received. +// You will receive one or more recognizing events as a speech phrase is recognized, with each containing +// more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. +reco.recognizing = function (s, e) { + var str = "(recognizing) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text; + console.log(str); +}; - // The event recognized signals that a final recognition result is received. - // This is the final event that a phrase has been recognized. - // For continuous recognition, you will get one recognized event for each phrase recognized. - reco.recognized = function (s, e) { - console.log("pronunciation assessment for: ", e.result.text); - var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(e.result); - console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', - "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', - "completeness score : ", pronunciation_result.completenessScore, '\n', - "fluency score: ", pronunciation_result.fluencyScore - ); +// The event recognized signals that a final recognition result is received. +// This is the final event that a phrase has been recognized. +// For continuous recognition, you will get one recognized event for each phrase recognized. +reco.recognized = function (s, e) { + console.log("pronunciation assessment for: ", e.result.text); + var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(e.result); + console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', + "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', + "completeness score : ", pronunciation_result.completenessScore, '\n', + "fluency score: ", pronunciation_result.fluencyScore + ); - jo = JSON.parse(e.result.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult)); - const nb = jo["NBest"][0]; - startOffset = nb.Words[0].Offset; - const localtext = _.map(nb.Words, (item) => item.Word.toLowerCase()); - currentText = currentText.concat(localtext); - fluencyScores.push(nb.PronunciationAssessment.FluencyScore); - const isSucceeded = jo.RecognitionStatus === 'Success'; - const nBestWords = jo.NBest[0].Words; - const durationList = []; - _.forEach(nBestWords, (word) => { - recognizedWords.push(word); - durationList.push(word.Duration); - }); - durations.push(_.sum(durationList)); + jo = JSON.parse(e.result.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult)); + const nb = jo["NBest"][0]; + startOffset = nb.Words[0].Offset; + const localtext = _.map(nb.Words, (item) => item.Word.toLowerCase()); + currentText = currentText.concat(localtext); + fluencyScores.push(nb.PronunciationAssessment.FluencyScore); + const isSucceeded = jo.RecognitionStatus === 'Success'; + const nBestWords = jo.NBest[0].Words; + const durationList = []; + _.forEach(nBestWords, (word) => { + recognizedWords.push(word); + durationList.push(word.Duration); + }); + durations.push(_.sum(durationList)); - if (isSucceeded && nBestWords) { - allWords.push(...nBestWords); - } - }; + if (isSucceeded && nBestWords) { + allWords.push(...nBestWords); + } +}; - function calculateOverallPronunciationScore() { - const resText = currentText.join(" "); - let wholelyricsArry = []; - let resTextArray = []; +function calculateOverallPronunciationScore() { + const resText = currentText.join(" "); + let wholelyricsArry = []; + let resTextArray = []; - // The sample code provides only zh-CN and en-US locales - if (["zh-cn"].includes(settings.language.toLowerCase())) { - const resTextProcessed = (resText.toLocaleLowerCase() ?? "").replace(new RegExp("[^a-zA-Z0-9\u4E00-\u9FA5']+", "g"), " "); - const wholelyrics = (reference_text.toLocaleLowerCase() ?? "").replace(new RegExp("[^a-zA-Z0-9\u4E00-\u9FA5']+", "g"), " "); - const segment = new Segment(); - segment.useDefault(); - segment.loadDict('wildcard.txt'); - _.map(segment.doSegment(wholelyrics, {stripPunctuation: true}), (res) => wholelyricsArry.push(res['w'])); - _.map(segment.doSegment(resTextProcessed, {stripPunctuation: true}), (res) => resTextArray.push(res['w'])); - } else { - let resTextProcessed = (resText.toLocaleLowerCase() ?? "").replace(new RegExp("[!\"#$%&()*+,-./:;<=>?@[^_`{|}~]+", "g"), "").replace(new RegExp("]+", "g"), ""); - let wholelyrics = (reference_text.toLocaleLowerCase() ?? "").replace(new RegExp("[!\"#$%&()*+,-./:;<=>?@[^_`{|}~]+", "g"), "").replace(new RegExp("]+", "g"), ""); - wholelyricsArry = wholelyrics.split(" "); - resTextArray = resTextProcessed.split(" "); - } - const wholelyricsArryRes = _.map( - _.filter(wholelyricsArry, (item) => !!item), - (item) => item.trim() - ); - - // For continuous pronunciation assessment mode, the service won't return the words with `Insertion` or `Omission` - // We need to compare with the reference text after received all recognized words to get these error words. - const diff = new difflib.SequenceMatcher(null, wholelyricsArryRes, resTextArray); - const lastWords = []; - for (const d of diff.getOpcodes()) { - if (d[0] == "insert" || d[0] == "replace") { - if (["zh-cn"].includes(settings.language.toLowerCase())) { - for (let j = d[3], count = 0; j < d[4]; count++) { - let len = 0; - let bfind = false; - _.map(allWords, (item, index) => { - if ( - (len == j || - (index + 1 < allWords.length && - allWords[index].Word.length > 1 && - j > len && - j < len + allWords[index + 1].Word.length)) && - !bfind - ) { - const wordNew = _.cloneDeep(allWords[index]); - if ( - allWords && - allWords.length > 0 && - allWords[index].PronunciationAssessment.ErrorType !== "Insertion" - ) { - wordNew.PronunciationAssessment.ErrorType = "Insertion"; - } - lastWords.push(wordNew); - bfind = true; - j += allWords[index].Word.length; - } - len = len + item.Word.length; - }); - } - } else { - for (let j = d[3]; j < d[4]; j++) { - if (allWords && allWords.length > 0 && allWords[j].PronunciationAssessment.ErrorType !== "Insertion") { - allWords[j].PronunciationAssessment.ErrorType = "Insertion"; - } - lastWords.push(allWords[j]); + // The sample code provides only zh-CN and en-US locales + if (["zh-cn"].includes(settings.language.toLowerCase())) { + const resTextProcessed = (resText.toLocaleLowerCase() ?? "").replace(new RegExp("[^a-zA-Z0-9\u4E00-\u9FA5']+", "g"), " "); + const wholelyrics = (reference_text.toLocaleLowerCase() ?? "").replace(new RegExp("[^a-zA-Z0-9\u4E00-\u9FA5']+", "g"), " "); + const segment = new Segment(); + segment.useDefault(); + segment.loadDict('wildcard.txt'); + _.map(segment.doSegment(wholelyrics, {stripPunctuation: true}), (res) => wholelyricsArry.push(res['w'])); + _.map(segment.doSegment(resTextProcessed, {stripPunctuation: true}), (res) => resTextArray.push(res['w'])); + } else { + let resTextProcessed = (resText.toLocaleLowerCase() ?? "").replace(new RegExp("[!\"#$%&()*+,-./:;<=>?@[^_`{|}~]+", "g"), "").replace(new RegExp("]+", "g"), ""); + let wholelyrics = (reference_text.toLocaleLowerCase() ?? "").replace(new RegExp("[!\"#$%&()*+,-./:;<=>?@[^_`{|}~]+", "g"), "").replace(new RegExp("]+", "g"), ""); + wholelyricsArry = wholelyrics.split(" "); + resTextArray = resTextProcessed.split(" "); + } + const wholelyricsArryRes = _.map( + _.filter(wholelyricsArry, (item) => !!item), + (item) => item.trim() + ); + + // For continuous pronunciation assessment mode, the service won't return the words with `Insertion` or `Omission` + // We need to compare with the reference text after received all recognized words to get these error words. + const diff = new difflib.SequenceMatcher(null, wholelyricsArryRes, resTextArray); + const lastWords = []; + for (const d of diff.getOpcodes()) { + if (d[0] == "insert" || d[0] == "replace") { + if (["zh-cn"].includes(settings.language.toLowerCase())) { + for (let j = d[3], count = 0; j < d[4]; count++) { + let len = 0; + let bfind = false; + _.map(allWords, (item, index) => { + if ( + (len == j || + (index + 1 < allWords.length && + allWords[index].Word.length > 1 && + j > len && + j < len + allWords[index + 1].Word.length)) && + !bfind + ) { + const wordNew = _.cloneDeep(allWords[index]); + if ( + allWords && + allWords.length > 0 && + allWords[index].PronunciationAssessment.ErrorType !== "Insertion" + ) { + wordNew.PronunciationAssessment.ErrorType = "Insertion"; + } + lastWords.push(wordNew); + bfind = true; + j += allWords[index].Word.length; } + len = len + item.Word.length; + }); + } + } else { + for (let j = d[3]; j < d[4]; j++) { + if (allWords && allWords.length > 0 && allWords[j].PronunciationAssessment.ErrorType !== "Insertion") { + allWords[j].PronunciationAssessment.ErrorType = "Insertion"; + } + lastWords.push(allWords[j]); } } - if (d[0] == "delete" || d[0] == "replace") { + } + if (d[0] == "delete" || d[0] == "replace") { if ( d[2] == wholelyricsArryRes.length && !( @@ -297,8 +292,4 @@ module.exports = { }; reco.startContinuousRecognitionAsync(); - } - -} -}()); diff --git a/samples/js/node/pronunciationAssessmentFromMicrophone.js b/samples/js/node/pronunciationAssessmentFromMicrophone.js index 0993385ee..e77f29530 100644 --- a/samples/js/node/pronunciationAssessmentFromMicrophone.js +++ b/samples/js/node/pronunciationAssessmentFromMicrophone.js @@ -2,104 +2,98 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -const MicToSpeech = require("mic-to-speech"); -const wav = require("wav"); -const _ = require('lodash'); -const readline = require('readline'); -const { exit } = require("process"); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as MicToSpeech from "mic-to-speech"; +import * as wav from "wav"; +import * as _ from "lodash"; +import * as readline from "readline"; +import { exit } from "process"; -(function() { - "use strict"; - - // pronunciation assessment with audio file - module.exports = { - main: function(settings) { - var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - // setting the recognition language to English. - speechConfig.speechRecognitionLanguage = settings.language; +// pronunciation assessment with audio file +export const main = (settings) => { + var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); + // setting the recognition language to English. + speechConfig.speechRecognitionLanguage = settings.language; - function onRecognizedResult(result, reco) { - var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(result); - console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', - "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', - "completeness score : ", pronunciation_result.completenessScore, '\n', - "fluency score: ", pronunciation_result.fluencyScore - ); - console.log(" Word-level details:"); - _.forEach(pronunciation_result.detailResult.Words, (word, idx) => { - console.log(" ", idx + 1, ": word: ", word.Word, "\taccuracy score: ", word.PronunciationAssessment.AccuracyScore, "\terror type: ", word.PronunciationAssessment.ErrorType, ";"); - }); - reco.close(); - exit(); - } + function onRecognizedResult(result, reco) { + var pronunciation_result = sdk.PronunciationAssessmentResult.fromResult(result); + console.log(" Accuracy score: ", pronunciation_result.accuracyScore, '\n', + "pronunciation score: ", pronunciation_result.pronunciationScore, '\n', + "completeness score : ", pronunciation_result.completenessScore, '\n', + "fluency score: ", pronunciation_result.fluencyScore + ); + console.log(" Word-level details:"); + _.forEach(pronunciation_result.detailResult.Words, (word, idx) => { + console.log(" ", idx + 1, ": word: ", word.Word, "\taccuracy score: ", word.PronunciationAssessment.AccuracyScore, "\terror type: ", word.PronunciationAssessment.ErrorType, ";"); + }); + reco.close(); + exit(); + } - function recognizeFromMic(buffer, reference_text) { - /** Writing WAV Headers into the buffer received. */ - let writer = new wav.Writer({ - sampleRate: 32000, - channels: 1, - bitDepth: 16, - }); - writer.write(buffer); - writer.end(); - writer.on("finish", ()=>{ - /** On finish, read the WAV stream using configuration the SDK provides. */ - let audioConfig = sdk.AudioConfig.fromWavFileInput(writer.read()); + function recognizeFromMic(buffer, reference_text) { + /** Writing WAV Headers into the buffer received. */ + let writer = new wav.Writer({ + sampleRate: 32000, + channels: 1, + bitDepth: 16, + }); + writer.write(buffer); + writer.end(); + writer.on("finish", ()=>{ + /** On finish, read the WAV stream using configuration the SDK provides. */ + let audioConfig = sdk.AudioConfig.fromWavFileInput(writer.read()); - // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. - const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig( - reference_text, - sdk.PronunciationAssessmentGradingSystem.HundredMark, - sdk.PronunciationAssessmentGranularity.Phoneme, - true - ); + // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. + const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig( + reference_text, + sdk.PronunciationAssessmentGradingSystem.HundredMark, + sdk.PronunciationAssessmentGranularity.Phoneme, + true + ); - // create the speech recognizer. - var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); - pronunciationAssessmentConfig.applyTo(reco); + // create the speech recognizer. + var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); + pronunciationAssessmentConfig.applyTo(reco); - reco.recognizeOnceAsync( - function (successfulResult) { - onRecognizedResult(successfulResult, reco); - }, - function (err) { - console.log("ERROR: " + err); - exit(); - } - ); - }); - } + reco.recognizeOnceAsync( + function (successfulResult) { + onRecognizedResult(successfulResult, reco); + }, + function (err) { + console.log("ERROR: " + err); + exit(); + } + ); + }); + } - try { - const inputProcess = readline.createInterface({ - input: process.stdin, - output: process.stdout + try { + const inputProcess = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); + inputProcess.question("Enter reference text you want to assess, or enter empty text to exit. \n> ", (inputText) => { + if (!inputText) { + exit(); + } else { + /** Configuration for the mic */ + let micToSpeech = new MicToSpeech({ + channels: 1, }); - inputProcess.question("Enter reference text you want to assess, or enter empty text to exit. \n> ", (inputText) => { - if (!inputText) { - exit(); - } else { - /** Configuration for the mic */ - let micToSpeech = new MicToSpeech({ - channels: 1, - }); - /** Receive the audio buffer from mic */ - micToSpeech.on("speech", function (buffer) { - console.log("buffer of speech received"); - recognizeFromMic(buffer, inputText); - }); - /** Start listening to speech. */ - micToSpeech.start(); - console.log("Listening for speech"); - } - inputProcess.close(); + /** Receive the audio buffer from mic */ + micToSpeech.on("speech", function (buffer) { + console.log("buffer of speech received"); + recognizeFromMic(buffer, inputText); }); - } catch (e) { - console.log("ERROR:", e); - exit(); + /** Start listening to speech. */ + micToSpeech.start(); + console.log("Listening for speech"); } - } + inputProcess.close(); + }); + } catch (e) { + console.log("ERROR:", e); + exit(); } -}()); +} diff --git a/samples/js/node/settings.js b/samples/js/node/settings.js index cd42b5087..c1092411e 100644 --- a/samples/js/node/settings.js +++ b/samples/js/node/settings.js @@ -1,25 +1,18 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -(function() { -"use strict"; - - module.exports = { - - // Replace with your own subscription key, service region (e.g., "westus"), - // and recognition language. - subscriptionKey: "YourSubscriptionKey", - serviceRegion: "YourServiceRegion", // e.g., "westus" - language: "en-US", - - // Replace with the full path to a wav file you want to recognize or overwrite. - filename: "YourAudioFile.wav", // 16000 Hz, Mono - - // Replace with your own Language Understanding subscription key (endpoint - // key), region, and app ID in case you want to run the intent sample. - luSubscriptionKey: "YourLanguageUnderstandingSubscriptionKey", - luServiceRegion: "YourLanguageUnderstandingServiceRegion", - luAppId: "YourLanguageUnderstandingAppId", - }; - }()); - +// Replace with your own subscription key, service region (e.g., "westus"), +// and recognition language. +export const subscriptionKey = "YourSubscriptionKey"; +export const serviceRegion = "YourSubscriptionRegion"; // e.g., "westus" +export const language = "en-US"; + +// Replace with the full path to a wav file you want to recognize or overwrite. +export const filename = "YourAudioFile.wav"; // 16000 Hz, Mono + +// Replace with your own Language Understanding subscription key (endpoint +// key), region, and app ID in case you want to run the intent sample. +export const luSubscriptionKey = "YourLanguageUnderstandingSubscriptionKey"; +export const luServiceRegion = "YourLanguageUnderstandingServiceRegion"; +export const luAppId = "YourLanguageUnderstandingAppId"; + diff --git a/samples/js/node/speech.js b/samples/js/node/speech.js index f5e340419..e6851921f 100644 --- a/samples/js/node/speech.js +++ b/samples/js/node/speech.js @@ -2,101 +2,95 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -const filePushStream = require("./filePushStream"); - -(function() { -"use strict"; - -module.exports = { - main: function(settings) { - - // now create the audio-config pointing to our stream and - // the speech config specifying the language. - - var audioStream = filePushStream.openPushStream(settings.filename); - var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream); - var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - - // setting the recognition language to English. - speechConfig.speechRecognitionLanguage = settings.language; - - // create the speech recognizer. - var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); - - // Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. - - // The event recognizing signals that an intermediate recognition result is received. - // You will receive one or more recognizing events as a speech phrase is recognized, with each containing - // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. - reco.recognizing = function (s, e) { - var str = "(recognizing) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text; - console.log(str); - }; - - // The event recognized signals that a final recognition result is received. - // This is the final event that a phrase has been recognized. - // For continuous recognition, you will get one recognized event for each phrase recognized. - reco.recognized = function (s, e) { - // Indicates that recognizable speech was not detected, and that recognition is done. - if (e.result.reason === sdk.ResultReason.NoMatch) { - var noMatchDetail = sdk.NoMatchDetails.fromResult(e.result); - console.log("\r\n(recognized) Reason: " + sdk.ResultReason[e.result.reason] + " NoMatchReason: " + sdk.NoMatchReason[noMatchDetail.reason]); - } else { - console.log("\r\n(recognized) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text); - } - }; +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as filePushStream from "./filePushStream.js"; - // The event signals that the service has stopped processing speech. - // https://docs.microsoft.com/javascript/api/microsoft-cognitiveservices-speech-sdk/speechrecognitioncanceledeventargs?view=azure-node-latest - // This can happen for two broad classes of reasons. - // 1. An error is encountered. - // In this case the .errorDetails property will contain a textual representation of the error. - // 2. Speech was detected to have ended. - // This can be caused by the end of the specified file being reached, or ~20 seconds of silence from a microphone input. - reco.canceled = function (s, e) { - var str = "(cancel) Reason: " + sdk.CancellationReason[e.reason]; - if (e.reason === sdk.CancellationReason.Error) { - str += ": " + e.errorDetails; - } - console.log(str); - }; - - // Signals that a new session has started with the speech service - reco.sessionStarted = function (s, e) { - var str = "(sessionStarted) SessionId: " + e.sessionId; - console.log(str); - }; +export const main = (settings) => { - // Signals the end of a session with the speech service. - reco.sessionStopped = function (s, e) { - var str = "(sessionStopped) SessionId: " + e.sessionId; - console.log(str); - }; + // now create the audio-config pointing to our stream and + // the speech config specifying the language. + + var audioStream = filePushStream.openPushStream(settings.filename); + var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream); + var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - // Signals that the speech service has started to detect speech. - reco.speechStartDetected = function (s, e) { - var str = "(speechStartDetected) SessionId: " + e.sessionId; - console.log(str); + // setting the recognition language to English. + speechConfig.speechRecognitionLanguage = settings.language; + + // create the speech recognizer. + var reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); + + // Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. + + // The event recognizing signals that an intermediate recognition result is received. + // You will receive one or more recognizing events as a speech phrase is recognized, with each containing + // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. + reco.recognizing = function (s, e) { + var str = "(recognizing) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text; + console.log(str); }; - // Signals that the speech service has detected that speech has stopped. - reco.speechEndDetected = function (s, e) { - var str = "(speechEndDetected) SessionId: " + e.sessionId; - console.log(str); +// The event recognized signals that a final recognition result is received. +// This is the final event that a phrase has been recognized. +// For continuous recognition, you will get one recognized event for each phrase recognized. +reco.recognized = function (s, e) { + // Indicates that recognizable speech was not detected, and that recognition is done. + if (e.result.reason === sdk.ResultReason.NoMatch) { + var noMatchDetail = sdk.NoMatchDetails.fromResult(e.result); + console.log("\r\n(recognized) Reason: " + sdk.ResultReason[e.result.reason] + " NoMatchReason: " + sdk.NoMatchReason[noMatchDetail.reason]); + } else { + console.log("\r\n(recognized) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text); + } +}; + +// The event signals that the service has stopped processing speech. +// https://docs.microsoft.com/javascript/api/microsoft-cognitiveservices-speech-sdk/speechrecognitioncanceledeventargs?view=azure-node-latest +// This can happen for two broad classes of reasons. +// 1. An error is encountered. +// In this case the .errorDetails property will contain a textual representation of the error. +// 2. Speech was detected to have ended. +// This can be caused by the end of the specified file being reached, or ~20 seconds of silence from a microphone input. +reco.canceled = function (s, e) { + var str = "(cancel) Reason: " + sdk.CancellationReason[e.reason]; + if (e.reason === sdk.CancellationReason.Error) { + str += ": " + e.errorDetails; + } + console.log(str); }; - // start the recognizer and wait for a result. - reco.recognizeOnceAsync( - function (result) { - reco.close(); - reco = undefined; - }, - function (err) { - reco.close(); - reco = undefined; - }); - } - -} -}()); +// Signals that a new session has started with the speech service +reco.sessionStarted = function (s, e) { + var str = "(sessionStarted) SessionId: " + e.sessionId; + console.log(str); +}; + +// Signals the end of a session with the speech service. +reco.sessionStopped = function (s, e) { + var str = "(sessionStopped) SessionId: " + e.sessionId; + console.log(str); +}; + +// Signals that the speech service has started to detect speech. +reco.speechStartDetected = function (s, e) { + var str = "(speechStartDetected) SessionId: " + e.sessionId; + console.log(str); +}; + +// Signals that the speech service has detected that speech has stopped. +reco.speechEndDetected = function (s, e) { + var str = "(speechEndDetected) SessionId: " + e.sessionId; + console.log(str); +}; + + // start the recognizer and wait for a result. + reco.recognizeOnceAsync( + function (result) { + reco.close(); + reco = undefined; + }, + function (err) { + reco.close(); + reco = undefined; + }); +}; + diff --git a/samples/js/node/synthesis.js b/samples/js/node/synthesis.js index 787a767c9..05378a4c3 100644 --- a/samples/js/node/synthesis.js +++ b/samples/js/node/synthesis.js @@ -2,92 +2,86 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); -var readline = require("readline"); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import * as readline from "readline"; -(function() { -"use strict"; +export const main = (settings, filename) => { -module.exports = { - main: function(settings, filename) { + // now create the audio-config pointing to the output file. + // You can also use audio output stream to initialize the audio config, see the docs for details. + var audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename); + var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - // now create the audio-config pointing to the output file. - // You can also use audio output stream to initialize the audio config, see the docs for details. - var audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename); - var speechConfig = sdk.SpeechConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); + // setting the synthesis language, voice name, and output audio format. + // see https://aka.ms/speech/tts-languages for available languages and voices + speechConfig.speechSynthesisLanguage = settings.language; + speechConfig.speechSynthesisVoiceName = "en-US-JennyNeural"; + speechConfig.speechSynthesisOutputFormat = sdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3; - // setting the synthesis language, voice name, and output audio format. - // see https://aka.ms/speech/tts-languages for available languages and voices - speechConfig.speechSynthesisLanguage = settings.language; - speechConfig.speechSynthesisVoiceName = "en-US-JennyNeural"; - speechConfig.speechSynthesisOutputFormat = sdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3; + var rl = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); - var rl = readline.createInterface({ - input: process.stdin, - output: process.stdout - }); + // create the speech synthesizer. + var synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig); - // create the speech synthesizer. - var synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig); + // Before beginning speech synthesis, setup the callbacks to be invoked when an event occurs. - // Before beginning speech synthesis, setup the callbacks to be invoked when an event occurs. + // The event synthesizing signals that a synthesized audio chunk is received. + // You will receive one or more synthesizing events as a speech phrase is synthesized. + // You can use this callback to streaming receive the synthesized audio. + synthesizer.synthesizing = function (s, e) { + var str = "(synthesizing) Reason: " + sdk.ResultReason[e.result.reason] + " Audio chunk length: " + e.result.audioData.byteLength; + console.log(str); + }; - // The event synthesizing signals that a synthesized audio chunk is received. - // You will receive one or more synthesizing events as a speech phrase is synthesized. - // You can use this callback to streaming receive the synthesized audio. - synthesizer.synthesizing = function (s, e) { - var str = "(synthesizing) Reason: " + sdk.ResultReason[e.result.reason] + " Audio chunk length: " + e.result.audioData.byteLength; - console.log(str); - }; - - // The event visemeReceived signals that a viseme is detected. - // a viseme is the visual description of a phoneme in spoken language. It defines the position of the face and mouth when speaking a word. - synthesizer.visemeReceived = function(s, e) { - var str = "(viseme) : Viseme event received. Audio offset: " + (e.audioOffset / 10000) + "ms, viseme id: " + e.visemeId; - console.log(str); - } - - // The event synthesis completed signals that the synthesis is completed. - synthesizer.synthesisCompleted = function (s, e) { - console.log("(synthesized) Reason: " + sdk.ResultReason[e.result.reason] + " Audio length: " + e.result.audioData.byteLength); - }; + // The event visemeReceived signals that a viseme is detected. + // a viseme is the visual description of a phoneme in spoken language. It defines the position of the face and mouth when speaking a word. + synthesizer.visemeReceived = function(s, e) { + var str = "(viseme) : Viseme event received. Audio offset: " + (e.audioOffset / 10000) + "ms, viseme id: " + e.visemeId; + console.log(str); + } + + // The event synthesis completed signals that the synthesis is completed. + synthesizer.synthesisCompleted = function (s, e) { + console.log("(synthesized) Reason: " + sdk.ResultReason[e.result.reason] + " Audio length: " + e.result.audioData.byteLength); + }; - // The synthesis started event signals that the synthesis is started. - synthesizer.synthesisStarted = function (s, e) { - console.log("(synthesis started)"); - }; + // The synthesis started event signals that the synthesis is started. + synthesizer.synthesisStarted = function (s, e) { + console.log("(synthesis started)"); + }; - // The event signals that the service has stopped processing speech. - // This can happen when an error is encountered. - synthesizer.SynthesisCanceled = function (s, e) { - var cancellationDetails = sdk.CancellationDetails.fromResult(e.result); - var str = "(cancel) Reason: " + sdk.CancellationReason[cancellationDetails.reason]; - if (cancellationDetails.reason === sdk.CancellationReason.Error) { - str += ": " + e.result.errorDetails; - } - console.log(str); - }; + // The event signals that the service has stopped processing speech. + // This can happen when an error is encountered. + synthesizer.SynthesisCanceled = function (s, e) { + var cancellationDetails = sdk.CancellationDetails.fromResult(e.result); + var str = "(cancel) Reason: " + sdk.CancellationReason[cancellationDetails.reason]; + if (cancellationDetails.reason === sdk.CancellationReason.Error) { + str += ": " + e.result.errorDetails; + } + console.log(str); + }; - // This event signals that word boundary is received. This indicates the audio boundary of each word. - // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. - synthesizer.wordBoundary = function (s, e) { - console.log("(WordBoundary), Text: " + e.text + ", Audio offset: " + e.audioOffset / 10000 + "ms."); - }; + // This event signals that word boundary is received. This indicates the audio boundary of each word. + // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. + synthesizer.wordBoundary = function (s, e) { + console.log("(WordBoundary), Text: " + e.text + ", Audio offset: " + e.audioOffset / 10000 + "ms."); + }; - rl.question("Type some text that you want to speak...\n> ", function (text) { - rl.close(); - // start the synthesizer and wait for a result. - synthesizer.speakTextAsync(text, - function (result) { - synthesizer.close(); - synthesizer = undefined; - }, - function (err) { - console.trace("err - " + err); - synthesizer.close(); - synthesizer = undefined; - }) - }); - } + rl.question("Type some text that you want to speak...\n> ", function (text) { + rl.close(); + // start the synthesizer and wait for a result. + synthesizer.speakTextAsync(text, + function (result) { + synthesizer.close(); + synthesizer = undefined; + }, + function (err) { + console.trace("err - " + err); + synthesizer.close(); + synthesizer = undefined; + }) + }); } -}()); diff --git a/samples/js/node/translation.js b/samples/js/node/translation.js index 4b3afce0a..e210e7600 100644 --- a/samples/js/node/translation.js +++ b/samples/js/node/translation.js @@ -2,69 +2,63 @@ // Licensed under the MIT license. // pull in the required packages. -var sdk = require("microsoft-cognitiveservices-speech-sdk"); +import * as sdk from "microsoft-cognitiveservices-speech-sdk"; -(function() { -"use strict"; +export const main = (settings) => { -module.exports = { - main: function(settings) { + // now create the audio-config pointing to our stream and + // the speech config specifying the language. + var audioStream = filePushStream.openPushStream(settings.filename); + var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream); + var translationConfig = sdk.SpeechTranslationConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); - // now create the audio-config pointing to our stream and - // the speech config specifying the language. - var audioStream = filePushStream.openPushStream(settings.filename); - var audioConfig = sdk.AudioConfig.fromStreamInput(audioStream); - var translationConfig = sdk.SpeechTranslationConfig.fromSubscription(settings.subscriptionKey, settings.serviceRegion); + // setting the recognition language to English. + translationConfig.speechRecognitionLanguage = settings.language; - // setting the recognition language to English. - translationConfig.speechRecognitionLanguage = settings.language; + // target language is German. + translationConfig.addTargetLanguage("de-DE"); - // target language is German. - translationConfig.addTargetLanguage("de-DE"); + // create the translation recognizer. + var recognizer = new sdk.TranslationRecognizer(translationConfig, audioConfig); - // create the translation recognizer. - var recognizer = new sdk.TranslationRecognizer(translationConfig, audioConfig); + // Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. - // Before beginning speech recognition, setup the callbacks to be invoked when an event occurs. + // The event recognizing signals that an intermediate recognition result is received. + // You will receive one or more recognizing events as a speech phrase is recognized, with each containing + // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. + // Both the source language text and the translation text(s) are available. + recognizer.recognizing = function (s, e) { + var str = ("(recognizing) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text + " Translations:"); - // The event recognizing signals that an intermediate recognition result is received. - // You will receive one or more recognizing events as a speech phrase is recognized, with each containing - // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized. - // Both the source language text and the translation text(s) are available. - recognizer.recognizing = function (s, e) { - var str = ("(recognizing) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text + " Translations:"); + var language = "de"; + str += " [" + language + "] " + e.result.translations.get(language); - var language = "de"; - str += " [" + language + "] " + e.result.translations.get(language); + console.log(str); + }; - console.log(str); - }; + // The event recognized signals that a final recognition result is received. + // This is the final event that a phrase has been recognized. + // For continuous recognition, you will get one recognized event for each phrase recognized. + // Both the source language text and the translation text(s) are available. + recognizer.recognized = function (s, e) { + var str = "\r\n(recognized) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text + " Translations:"; - // The event recognized signals that a final recognition result is received. - // This is the final event that a phrase has been recognized. - // For continuous recognition, you will get one recognized event for each phrase recognized. - // Both the source language text and the translation text(s) are available. - recognizer.recognized = function (s, e) { - var str = "\r\n(recognized) Reason: " + sdk.ResultReason[e.result.reason] + " Text: " + e.result.text + " Translations:"; + var language = "de"; + str += " [" + language + "] " + e.result.translations.get(language); + str += "\r\n"; - var language = "de"; - str += " [" + language + "] " + e.result.translations.get(language); - str += "\r\n"; + console.log(str); + }; + + // start the recognizer and wait for a result. + recognizer.recognizeOnceAsync( + function (result) { + recognizer.close(); + recognizer = undefined; + }, + function (err) { + recognizer.close(); + recognizer = undefined; + }); +}; - console.log(str); - }; - - // start the recognizer and wait for a result. - recognizer.recognizeOnceAsync( - function (result) { - recognizer.close(); - recognizer = undefined; - }, - function (err) { - recognizer.close(); - recognizer = undefined; - }); - } - -} -}());