adding PA content score functionality for cpp/swift/objc/java language (

#2151)
Azure-Samples · Nov 24, 2023 · 6e09ee4 · 6e09ee4
1 parent b6f037e
commit 6e09ee4
Show file tree

Hide file tree

Showing 13 changed files with 505 additions and 29 deletions.
diff --git a/samples/cpp/windows/console/samples/main.cpp b/samples/cpp/windows/console/samples/main.cpp
@@ -18,6 +18,7 @@ extern void KeywordTriggeredSpeechRecognitionWithMicrophone();
 extern void PronunciationAssessmentWithMicrophone();
 extern void PronunciationAssessmentWithStream();
 extern void PronunciationAssessmentConfiguredWithJson();
+extern void PronunciationAssessmentWithContentAssessment();
 extern void SpeechContinuousRecognitionFromDefaultMicrophoneWithMASEnabled();
 extern void SpeechRecognitionFromMicrophoneWithMASEnabledAndPresetGeometrySpecified();
 extern void SpeechContinuousRecognitionFromMultiChannelFileWithMASEnabledAndCustomGeometrySpecified();
@@ -109,6 +110,7 @@ void SpeechSamples()
                 "    beam-forming angles specified.\n";
         cout << "e.) Pronunciation assessment with stream.\n";
         cout << "f.) Pronunciation assessment configured with json.\n";
+        cout << "g.) Pronunciation assessment with content assessment.\n";
         cout << "\nChoice (0 for MAIN MENU): ";
         cout.flush();
 
@@ -168,6 +170,10 @@ void SpeechSamples()
         case 'f':
             PronunciationAssessmentConfiguredWithJson();
             break;
+        case 'G':
+        case 'g':
+            PronunciationAssessmentWithContentAssessment();
+            break;
         case '0':
             break;
         }

diff --git a/samples/cpp/windows/console/samples/pronunciation_assessment_fall.wav b/samples/cpp/windows/console/samples/pronunciation_assessment_fall.wav
diff --git a/samples/cpp/windows/console/samples/samples.vcxproj b/samples/cpp/windows/console/samples/samples.vcxproj
@@ -190,7 +190,7 @@
     <None Include="katiesteve_mono.wav">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
       <DeploymentContent>true</DeploymentContent>
-    </None>    
+    </None>
     <None Include="enrollment_audio_katie.wav">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
       <DeploymentContent>true</DeploymentContent>
@@ -204,6 +204,12 @@
       <DeploymentContent>true</DeploymentContent>
     </None>
   </ItemGroup>
+  <ItemGroup>
+    <CopyFileToFolders Include="pronunciation_assessment_fall.wav">
+      <DeploymentContent>true</DeploymentContent>
+      <FileType>Document</FileType>
+    </CopyFileToFolders>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
     <PropertyGroup>
@@ -213,4 +219,4 @@
     <Error Condition="!Exists('..\packages\Microsoft.CognitiveServices.Speech.Extension.MAS.1.33.0\build\native\Microsoft.CognitiveServices.Speech.Extension.MAS.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.CognitiveServices.Speech.Extension.MAS.1.33.0\build\native\Microsoft.CognitiveServices.Speech.Extension.MAS.targets'))" />
     <Error Condition="!Exists('..\packages\nlohmann.json.3.10.4\build\native\nlohmann.json.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\nlohmann.json.3.10.4\build\native\nlohmann.json.targets'))" />
   </Target>
-</Project>
+</Project>
diff --git a/samples/cpp/windows/console/samples/samples.vcxproj.filters b/samples/cpp/windows/console/samples/samples.vcxproj.filters
@@ -56,6 +56,9 @@
     <ClCompile Include="diagnostics_logging_samples.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="meeting_transcriber_samples.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="whatstheweatherlike.wav">
@@ -74,5 +77,13 @@
       <Filter>Resource Files</Filter>
     </None>
     <None Include="packages.config" />
+    <None Include="katiesteve_mono.wav">
+      <Filter>Resource Files</Filter>
+    </None>
+  </ItemGroup>
+  <ItemGroup>
+    <CopyFileToFolders Include="pronunciation_assessment_fall.wav">
+      <Filter>Resource Files</Filter>
+    </CopyFileToFolders>
   </ItemGroup>
 </Project>
diff --git a/samples/cpp/windows/console/samples/speech_recognition_samples.cpp b/samples/cpp/windows/console/samples/speech_recognition_samples.cpp
@@ -558,11 +558,13 @@ void PronunciationAssessmentWithMicrophone()
     config->SetProperty(PropertyId::SpeechServiceConnection_EndSilenceTimeoutMs, "3000");
 
     std::string referenceText = "";
-    // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
+    // Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
     auto pronunciationConfig = PronunciationAssessmentConfig::Create(referenceText,
         PronunciationAssessmentGradingSystem::HundredMark,
         PronunciationAssessmentGranularity::Phoneme, true);
 
+    pronunciationConfig->EnableProsodyAssessment();
+
     // Creates a speech recognizer using microphone as audio input.
     auto recognizer = SpeechRecognizer::FromConfig(config, "en-US");
 
@@ -594,8 +596,8 @@ void PronunciationAssessmentWithMicrophone()
 
             auto pronunciationResult = PronunciationAssessmentResult::FromResult(result);
 
-            cout << "    Accuracy score: " << pronunciationResult->AccuracyScore << ", Pronunciation score: "
-                 << pronunciationResult->PronunciationScore << ", Completeness score : " << pronunciationResult->CompletenessScore
+            cout << "    Accuracy score: " << pronunciationResult->AccuracyScore << ", Prosody Score: " << pronunciationResult->ProsodyScore << ", Pronunciation score: "
+                 << pronunciationResult->PronunciationScore << ", Completeness score: " << pronunciationResult->CompletenessScore
                  << ", FluencyScore: " << pronunciationResult->FluencyScore << endl;
         }
         else if (result->Reason == ResultReason::NoMatch)
@@ -625,8 +627,11 @@ void PronunciationAssessmentWithStreamInternalAsync(shared_ptr<SpeechConfig> spe
     // Specify the language used for Pronunciation Assessment
     auto speechRecognizer = SpeechRecognizer::FromConfig(speechConfig, "en-US", audioConfig);
 
-    // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
+    // Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
     auto pronAssessmentConfig = PronunciationAssessmentConfig::Create(referenceText, PronunciationAssessmentGradingSystem::HundredMark, PronunciationAssessmentGranularity::Phoneme, false);
+
+    pronAssessmentConfig->EnableProsodyAssessment();
+
     pronAssessmentConfig->ApplyTo(speechRecognizer);
 
     audioInputStream->Write(audioData.data(), static_cast<uint32_t>(audioData.size()));
@@ -642,7 +647,7 @@ void PronunciationAssessmentWithStreamInternalAsync(shared_ptr<SpeechConfig> spe
     {
         auto responsePA = PronunciationAssessmentResult::FromResult(result);
         std::string responseResult = "PRONUNCIATION ASSESSMENT RESULTS : \n";
-        responseResult = responseResult + "  Accuracy score: " + std::to_string(responsePA->AccuracyScore) + ", Pronunciation score: " + std::to_string(responsePA->PronunciationScore) + ", Completeness score : " + std::to_string(responsePA->CompletenessScore) + ", FluencyScore: " + std::to_string(responsePA->FluencyScore);
+        responseResult = responseResult + "  Accuracy score: " + std::to_string(responsePA->AccuracyScore) + "  Prosody score: " + std::to_string(responsePA->ProsodyScore) + ", Pronunciation score: " + std::to_string(responsePA->PronunciationScore) + ", Completeness score : " + std::to_string(responsePA->CompletenessScore) + ", FluencyScore: " + std::to_string(responsePA->FluencyScore);
 
         resultContainer.push_back(responseResult);
     }
@@ -697,11 +702,13 @@ void PronunciationAssessmentConfiguredWithJson()
 
     std::string referenceText = "what's the weather like";
 
-    // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
+    // Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
     std::string json_config = "{\"GradingSystem\":\"HundredMark\",\"Granularity\":\"Phoneme\",\"EnableMiscue\":true, \"ScenarioId\":\"[scenario ID will be assigned by product team]\"}";
     auto pronunciationConfig = PronunciationAssessmentConfig::CreateFromJson(json_config);
     pronunciationConfig->SetReferenceText(referenceText);
 
+    pronunciationConfig->EnableProsodyAssessment();
+
     // Creates a speech recognizer.
     auto recognizer = SpeechRecognizer::FromConfig(config, "en-US", audioConfig);
 
@@ -719,7 +726,7 @@ void PronunciationAssessmentConfiguredWithJson()
 
         auto pronunciationResult = PronunciationAssessmentResult::FromResult(result);
 
-        cout << "    Accuracy score: " << pronunciationResult->AccuracyScore << ", Pronunciation score: "
+        cout << "    Accuracy score: " << pronunciationResult->AccuracyScore << ", Prosody Score: " << pronunciationResult->ProsodyScore << ", Pronunciation score: "
             << pronunciationResult->PronunciationScore << ", Completeness score : " << pronunciationResult->CompletenessScore
             << ", FluencyScore: " << pronunciationResult->FluencyScore << endl;
     }
@@ -741,6 +748,92 @@ void PronunciationAssessmentConfiguredWithJson()
     }
 }
 
+// Pronunciation assessment with content assessment
+void PronunciationAssessmentWithContentAssessment()
+{
+    // Creates an instance of a speech config with specified subscription key and service region.
+    // Replace with your own subscription key and service region (e.g., "westus").
+    auto config = SpeechConfig::FromSubscription("YourSubscriptionKey", "YourServiceRegion");
+
+    // Creates a speech recognizer from an audio file
+    auto audioConfig = AudioConfig::FromWavFileInput("pronunciation_assessment_fall.wav");
+
+    std::string theTopic = "the season of the fall";
+
+    // Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
+    auto pronunciationConfig = PronunciationAssessmentConfig::Create("", PronunciationAssessmentGradingSystem::HundredMark, PronunciationAssessmentGranularity::Phoneme, false);
+
+    pronunciationConfig->EnableProsodyAssessment();
+    pronunciationConfig->EnableContentAssessmentWithTopic(theTopic);
+
+    // Creates a speech recognizer.
+    auto recognizer = SpeechRecognizer::FromConfig(config, "en-US", audioConfig);
+
+    pronunciationConfig->ApplyTo(recognizer);
+
+    vector<string> recognizedTexts;
+    std::shared_ptr<PronunciationContentAssessmentResult> contentResult;
+    promise<void> recognitionEnd;
+
+    recognizer->SessionStopped += [&recognitionEnd](const SessionEventArgs& e)
+    {
+        cout << "Session stopped." << endl;
+        recognitionEnd.set_value();
+    };
+
+    recognizer->Canceled += [&recognitionEnd](const SpeechRecognitionCanceledEventArgs& e)
+    {
+        switch (e.Reason)
+        {
+        case CancellationReason::EndOfStream:
+            cout << "CANCELED: Reach the end of the file." << std::endl;
+            break;
+
+        case CancellationReason::Error:
+            cout << "CANCELED: ErrorCode=" << (int)e.ErrorCode << std::endl;
+            cout << "CANCELED: ErrorDetails=" << e.ErrorDetails << std::endl;
+            recognitionEnd.set_value();
+            break;
+
+        default:
+            cout << "CANCELED: received unknown reason." << std::endl;
+        }
+    };
+
+    recognizer->Recognized += [&recognizedTexts, &contentResult](const SpeechRecognitionEventArgs& e)
+    {
+        string text = e.Result->Text;
+        if (!text.empty() && text != ".")
+            recognizedTexts.push_back(text);
+
+        auto pronResult = PronunciationAssessmentResult::FromResult(e.Result);
+
+        contentResult = pronResult->ContentAssessmentResult;
+    };
+
+    recognizer->StartContinuousRecognitionAsync().wait();
+
+    recognitionEnd.get_future().get(); // Waits for recognition end.
+
+    recognizer->StopContinuousRecognitionAsync().get();
+
+    // Content assessment result is in the contentJsons
+    cout << "Content assessment for: " << endl;
+    for (const string& recognizedText : recognizedTexts) {
+        if (!recognizedText.empty()) {
+            cout << recognizedText << " ";
+        }
+    }
+    cout << endl;
+
+    if (contentResult != nullptr) {
+        cout << "Assessment Result: " << "GrammarScore: "  << contentResult->GrammarScore << ", VocabularyScore : " << contentResult->VocabularyScore << ", TopicScore : " << contentResult->TopicScore << endl;
+    }
+    else {
+        cout << "The contentResult is empty!" << endl;
+    }
+}
+
 #pragma region Language Detection related samples
 
 void SpeechRecognitionAndLanguageIdWithMicrophone()

diff --git a/...les/java/jre/console/src/com/microsoft/cognitiveservices/speech/samples/console/Main.java b/...les/java/jre/console/src/com/microsoft/cognitiveservices/speech/samples/console/Main.java
@@ -62,6 +62,7 @@ public static void main(String[] args) {
         System.out.println("45. Continuous speech recognition from file, with at-start language detection with custom model.");
         System.out.println("46. Continuous speech recognition from file, with continuous language detection with custom models.");
         System.out.println("47. Pronunciation assessment configured with JSON.");
+        System.out.println("48. Pronunciation assessment with content assessment.");
 
         System.out.print(prompt);
 
@@ -212,6 +213,9 @@ public static void main(String[] args) {
                 case "47":
                     SpeechRecognitionSamples.pronunciationAssessmentConfiguredWithJson();
                     break;
+                case "48":
+                    SpeechRecognitionSamples.pronunciationAssessmentWithContentAssessment();
+                    break;
                 case "0":
                     System.out.println("Exiting...");
                     break;