Skip to content

Commit

Permalink
adding PA content score functionality for cpp/swift/objc/java language (
Browse files Browse the repository at this point in the history
  • Loading branch information
jinshan1979 authored Nov 24, 2023
1 parent b6f037e commit 6e09ee4
Show file tree
Hide file tree
Showing 13 changed files with 505 additions and 29 deletions.
6 changes: 6 additions & 0 deletions samples/cpp/windows/console/samples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ extern void KeywordTriggeredSpeechRecognitionWithMicrophone();
extern void PronunciationAssessmentWithMicrophone();
extern void PronunciationAssessmentWithStream();
extern void PronunciationAssessmentConfiguredWithJson();
extern void PronunciationAssessmentWithContentAssessment();
extern void SpeechContinuousRecognitionFromDefaultMicrophoneWithMASEnabled();
extern void SpeechRecognitionFromMicrophoneWithMASEnabledAndPresetGeometrySpecified();
extern void SpeechContinuousRecognitionFromMultiChannelFileWithMASEnabledAndCustomGeometrySpecified();
Expand Down Expand Up @@ -109,6 +110,7 @@ void SpeechSamples()
" beam-forming angles specified.\n";
cout << "e.) Pronunciation assessment with stream.\n";
cout << "f.) Pronunciation assessment configured with json.\n";
cout << "g.) Pronunciation assessment with content assessment.\n";
cout << "\nChoice (0 for MAIN MENU): ";
cout.flush();

Expand Down Expand Up @@ -168,6 +170,10 @@ void SpeechSamples()
case 'f':
PronunciationAssessmentConfiguredWithJson();
break;
case 'G':
case 'g':
PronunciationAssessmentWithContentAssessment();
break;
case '0':
break;
}
Expand Down
Binary file not shown.
10 changes: 8 additions & 2 deletions samples/cpp/windows/console/samples/samples.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@
<None Include="katiesteve_mono.wav">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<DeploymentContent>true</DeploymentContent>
</None>
</None>
<None Include="enrollment_audio_katie.wav">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<DeploymentContent>true</DeploymentContent>
Expand All @@ -204,6 +204,12 @@
<DeploymentContent>true</DeploymentContent>
</None>
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="pronunciation_assessment_fall.wav">
<DeploymentContent>true</DeploymentContent>
<FileType>Document</FileType>
</CopyFileToFolders>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
Expand All @@ -213,4 +219,4 @@
<Error Condition="!Exists('..\packages\Microsoft.CognitiveServices.Speech.Extension.MAS.1.33.0\build\native\Microsoft.CognitiveServices.Speech.Extension.MAS.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.CognitiveServices.Speech.Extension.MAS.1.33.0\build\native\Microsoft.CognitiveServices.Speech.Extension.MAS.targets'))" />
<Error Condition="!Exists('..\packages\nlohmann.json.3.10.4\build\native\nlohmann.json.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\nlohmann.json.3.10.4\build\native\nlohmann.json.targets'))" />
</Target>
</Project>
</Project>
11 changes: 11 additions & 0 deletions samples/cpp/windows/console/samples/samples.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@
<ClCompile Include="diagnostics_logging_samples.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="meeting_transcriber_samples.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="whatstheweatherlike.wav">
Expand All @@ -74,5 +77,13 @@
<Filter>Resource Files</Filter>
</None>
<None Include="packages.config" />
<None Include="katiesteve_mono.wav">
<Filter>Resource Files</Filter>
</None>
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="pronunciation_assessment_fall.wav">
<Filter>Resource Files</Filter>
</CopyFileToFolders>
</ItemGroup>
</Project>
107 changes: 100 additions & 7 deletions samples/cpp/windows/console/samples/speech_recognition_samples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,11 +558,13 @@ void PronunciationAssessmentWithMicrophone()
config->SetProperty(PropertyId::SpeechServiceConnection_EndSilenceTimeoutMs, "3000");

std::string referenceText = "";
// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
// Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
auto pronunciationConfig = PronunciationAssessmentConfig::Create(referenceText,
PronunciationAssessmentGradingSystem::HundredMark,
PronunciationAssessmentGranularity::Phoneme, true);

pronunciationConfig->EnableProsodyAssessment();

// Creates a speech recognizer using microphone as audio input.
auto recognizer = SpeechRecognizer::FromConfig(config, "en-US");

Expand Down Expand Up @@ -594,8 +596,8 @@ void PronunciationAssessmentWithMicrophone()

auto pronunciationResult = PronunciationAssessmentResult::FromResult(result);

cout << " Accuracy score: " << pronunciationResult->AccuracyScore << ", Pronunciation score: "
<< pronunciationResult->PronunciationScore << ", Completeness score : " << pronunciationResult->CompletenessScore
cout << " Accuracy score: " << pronunciationResult->AccuracyScore << ", Prosody Score: " << pronunciationResult->ProsodyScore << ", Pronunciation score: "
<< pronunciationResult->PronunciationScore << ", Completeness score: " << pronunciationResult->CompletenessScore
<< ", FluencyScore: " << pronunciationResult->FluencyScore << endl;
}
else if (result->Reason == ResultReason::NoMatch)
Expand Down Expand Up @@ -625,8 +627,11 @@ void PronunciationAssessmentWithStreamInternalAsync(shared_ptr<SpeechConfig> spe
// Specify the language used for Pronunciation Assessment
auto speechRecognizer = SpeechRecognizer::FromConfig(speechConfig, "en-US", audioConfig);

// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
// Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
auto pronAssessmentConfig = PronunciationAssessmentConfig::Create(referenceText, PronunciationAssessmentGradingSystem::HundredMark, PronunciationAssessmentGranularity::Phoneme, false);

pronAssessmentConfig->EnableProsodyAssessment();

pronAssessmentConfig->ApplyTo(speechRecognizer);

audioInputStream->Write(audioData.data(), static_cast<uint32_t>(audioData.size()));
Expand All @@ -642,7 +647,7 @@ void PronunciationAssessmentWithStreamInternalAsync(shared_ptr<SpeechConfig> spe
{
auto responsePA = PronunciationAssessmentResult::FromResult(result);
std::string responseResult = "PRONUNCIATION ASSESSMENT RESULTS : \n";
responseResult = responseResult + " Accuracy score: " + std::to_string(responsePA->AccuracyScore) + ", Pronunciation score: " + std::to_string(responsePA->PronunciationScore) + ", Completeness score : " + std::to_string(responsePA->CompletenessScore) + ", FluencyScore: " + std::to_string(responsePA->FluencyScore);
responseResult = responseResult + " Accuracy score: " + std::to_string(responsePA->AccuracyScore) + " Prosody score: " + std::to_string(responsePA->ProsodyScore) + ", Pronunciation score: " + std::to_string(responsePA->PronunciationScore) + ", Completeness score : " + std::to_string(responsePA->CompletenessScore) + ", FluencyScore: " + std::to_string(responsePA->FluencyScore);

resultContainer.push_back(responseResult);
}
Expand Down Expand Up @@ -697,11 +702,13 @@ void PronunciationAssessmentConfiguredWithJson()

std::string referenceText = "what's the weather like";

// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
// Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
std::string json_config = "{\"GradingSystem\":\"HundredMark\",\"Granularity\":\"Phoneme\",\"EnableMiscue\":true, \"ScenarioId\":\"[scenario ID will be assigned by product team]\"}";
auto pronunciationConfig = PronunciationAssessmentConfig::CreateFromJson(json_config);
pronunciationConfig->SetReferenceText(referenceText);

pronunciationConfig->EnableProsodyAssessment();

// Creates a speech recognizer.
auto recognizer = SpeechRecognizer::FromConfig(config, "en-US", audioConfig);

Expand All @@ -719,7 +726,7 @@ void PronunciationAssessmentConfiguredWithJson()

auto pronunciationResult = PronunciationAssessmentResult::FromResult(result);

cout << " Accuracy score: " << pronunciationResult->AccuracyScore << ", Pronunciation score: "
cout << " Accuracy score: " << pronunciationResult->AccuracyScore << ", Prosody Score: " << pronunciationResult->ProsodyScore << ", Pronunciation score: "
<< pronunciationResult->PronunciationScore << ", Completeness score : " << pronunciationResult->CompletenessScore
<< ", FluencyScore: " << pronunciationResult->FluencyScore << endl;
}
Expand All @@ -741,6 +748,92 @@ void PronunciationAssessmentConfiguredWithJson()
}
}

// Pronunciation assessment with content assessment
void PronunciationAssessmentWithContentAssessment()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
auto config = SpeechConfig::FromSubscription("YourSubscriptionKey", "YourServiceRegion");

// Creates a speech recognizer from an audio file
auto audioConfig = AudioConfig::FromWavFileInput("pronunciation_assessment_fall.wav");

std::string theTopic = "the season of the fall";

// Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
auto pronunciationConfig = PronunciationAssessmentConfig::Create("", PronunciationAssessmentGradingSystem::HundredMark, PronunciationAssessmentGranularity::Phoneme, false);

pronunciationConfig->EnableProsodyAssessment();
pronunciationConfig->EnableContentAssessmentWithTopic(theTopic);

// Creates a speech recognizer.
auto recognizer = SpeechRecognizer::FromConfig(config, "en-US", audioConfig);

pronunciationConfig->ApplyTo(recognizer);

vector<string> recognizedTexts;
std::shared_ptr<PronunciationContentAssessmentResult> contentResult;
promise<void> recognitionEnd;

recognizer->SessionStopped += [&recognitionEnd](const SessionEventArgs& e)
{
cout << "Session stopped." << endl;
recognitionEnd.set_value();
};

recognizer->Canceled += [&recognitionEnd](const SpeechRecognitionCanceledEventArgs& e)
{
switch (e.Reason)
{
case CancellationReason::EndOfStream:
cout << "CANCELED: Reach the end of the file." << std::endl;
break;

case CancellationReason::Error:
cout << "CANCELED: ErrorCode=" << (int)e.ErrorCode << std::endl;
cout << "CANCELED: ErrorDetails=" << e.ErrorDetails << std::endl;
recognitionEnd.set_value();
break;

default:
cout << "CANCELED: received unknown reason." << std::endl;
}
};

recognizer->Recognized += [&recognizedTexts, &contentResult](const SpeechRecognitionEventArgs& e)
{
string text = e.Result->Text;
if (!text.empty() && text != ".")
recognizedTexts.push_back(text);

auto pronResult = PronunciationAssessmentResult::FromResult(e.Result);

contentResult = pronResult->ContentAssessmentResult;
};

recognizer->StartContinuousRecognitionAsync().wait();

recognitionEnd.get_future().get(); // Waits for recognition end.

recognizer->StopContinuousRecognitionAsync().get();

// Content assessment result is in the contentJsons
cout << "Content assessment for: " << endl;
for (const string& recognizedText : recognizedTexts) {
if (!recognizedText.empty()) {
cout << recognizedText << " ";
}
}
cout << endl;

if (contentResult != nullptr) {
cout << "Assessment Result: " << "GrammarScore: " << contentResult->GrammarScore << ", VocabularyScore : " << contentResult->VocabularyScore << ", TopicScore : " << contentResult->TopicScore << endl;
}
else {
cout << "The contentResult is empty!" << endl;
}
}

#pragma region Language Detection related samples

void SpeechRecognitionAndLanguageIdWithMicrophone()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public static void main(String[] args) {
System.out.println("45. Continuous speech recognition from file, with at-start language detection with custom model.");
System.out.println("46. Continuous speech recognition from file, with continuous language detection with custom models.");
System.out.println("47. Pronunciation assessment configured with JSON.");
System.out.println("48. Pronunciation assessment with content assessment.");

System.out.print(prompt);

Expand Down Expand Up @@ -212,6 +213,9 @@ public static void main(String[] args) {
case "47":
SpeechRecognitionSamples.pronunciationAssessmentConfiguredWithJson();
break;
case "48":
SpeechRecognitionSamples.pronunciationAssessmentWithContentAssessment();
break;
case "0":
System.out.println("Exiting...");
break;
Expand Down
Loading

0 comments on commit 6e09ee4

Please sign in to comment.