Skip to content

Commit

Permalink
Merge branch 'master' into jvandervegte/RevertYetAnotherRevert
Browse files Browse the repository at this point in the history
  • Loading branch information
HenryvanderVegte authored Aug 21, 2024
2 parents b9fbb45 + c8f24a7 commit 7660a7f
Show file tree
Hide file tree
Showing 88 changed files with 6,197 additions and 295 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ proguard-rules.pro text
*.bacpac binary
*.class binary
*.bmp binary
*.ico binary
*.jar binary
*.jpg binary
*.mp3 binary
Expand Down
9 changes: 8 additions & 1 deletion samples/batch-avatar/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,21 @@
The Batch avatar synthesis API (Preview) provides asynchronous synthesis of talking avatar to generate avatar video content with the text input.
The functionality is exposed through a REST API and is easy to access from many programming languages. The samples here do **NOT** require the installation of the Cognitive Service Speech SDK, but use the REST API directly instead.

For a detailed explanation see the [batch synthesis documentation](https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-synthesis) and the `README.md` in the language specific subdirectories.
For a detailed explanation see the [batch avatar synthesis documentation](https://learn.microsoft.com/azure/ai-services/speech-service/text-to-speech-avatar/batch-synthesis-avatar) and the `README.md` in the language specific subdirectories.

Available samples:

| Language | Directory | Description |
| ---------- | -------- | ----------- |
| Python | [python](python) | Python client calling batch avatar synthesis REST API |
| C# | [csharp](csharp) | C# client calling batch avatar REST API. |

## Note

Refer to [this](../js/browser/avatar/README.md) for real time avatar synthesis.


## Resources

1. [Batch avatar synthesis request properties](https://learn.microsoft.com/azure/ai-services/speech-service/text-to-speech-avatar/batch-synthesis-avatar-properties)
2. [The OPENAPI specification for the Batch avatar synthesis API](https://github.com/Azure/azure-rest-api-specs/blob/main/specification/cognitiveservices/data-plane/Speech/BatchAvatar/stable/2024-08-01/batchavatar.json)
5 changes: 5 additions & 0 deletions samples/batch-avatar/csharp/BatchAvatarSample/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[*]
charset = utf-8

[*.cs]
file_header_template = \nCopyright (c) Microsoft. All rights reserved.\nLicensed under the MIT license. See LICENSE.md file in the project root for full license information.\n
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.9.34728.123
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BatchAvatarSample", "BatchAvatarSample\BatchAvatarSample.csproj", "{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {B0A9639E-340F-46FF-B5AE-92569CFF2A72}
EndGlobalSection
EndGlobal
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//

using BatchAvatarSample.dto;
using System.Net.Http.Json;
using System.Text.Json;
using System.Text.Json.Serialization;

internal class Program
{
private const string ApiVersion = "2024-08-01";

// The endpoint (and key) could be gotten from the Keys and Endpoint page in the Speech service resource.
// The endpoint would be like: https://<region>.api.cognitive.microsoft.com or https://<custom_domain>.cognitiveservices.azure.com
private static readonly string subscriptionKey = "SPEECH_KEY";
private static readonly string speechEndpoint = "SPEECH_ENDPOINT";

private static readonly JsonSerializerOptions defaultJsonSerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};

private static async Task Main(string[] args)
{
using var httpClient = new HttpClient();
httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", subscriptionKey);

// Create a job ID.
// Job ID must be unique within a speech resource.
var jobId = $"batchavatar-{Guid.NewGuid()}";

//
// Submit a batch avatar job
//
var jobUri = $"{speechEndpoint}/avatar/batchsyntheses/{jobId}?api-version={ApiVersion}";

try
{
var job = await CreateBatchAvatarJob(httpClient, jobUri);
Console.WriteLine($"Batch avatar synthesis job {jobId} created.");

//
// Get job status
//
job = await GetJobAsync(httpClient, jobUri);
Console.WriteLine($"Batch avatar synthesis job {job.Id} is in {job.Status} status.");

//
// Poll until job completes
//
while (job.Status is not ("Succeeded" or "Failed"))
{
Console.WriteLine($"Batch avatar synthesis job {job.Id} is still running.");
await Task.Delay(5000);
job = await GetJobAsync(httpClient, jobUri);
}

if (job.Status is "Failed")
{
Console.WriteLine($"Batch avatar synthesis job {job.Id} failed.");
return;
}

Console.WriteLine($"Batch avatar synthesis job {job.Id} completed successfully.");

//
// Get outputs
//
Console.WriteLine("Video file can be downloaded from:");
Console.WriteLine(job.Outputs!.Result);
Console.WriteLine("Summary file can be downloaded from:");
Console.WriteLine(job.Outputs!.Summary);

//
// Delete a job
//
await DeleteJobAsync(httpClient, jobUri);
Console.WriteLine($"Batch avatar synthesis job {job.Id} deleted.");

//
// List jobs
//
var maxpagesize = 10;
var listUri = $"{speechEndpoint}/avatar/batchsyntheses?maxpagesize={maxpagesize}&api-version={ApiVersion}";
var allJobs = await ListJobsAsync(httpClient, listUri);
Console.WriteLine($"Listed {allJobs.Count} jobs.");
}
catch (HttpRequestException exception)
{
Console.Error.WriteLine(exception.Message);
return;
}
}


private static async Task<BatchAvatarJob> CreateBatchAvatarJob(HttpClient httpClient, string jobUri)
{
// To use SSML as input, please refer to RequestExamples.SsmlRequest
// To use your custom neural voice, please refer to RequestExamples.CustomVoiceRequest
var requestBody = new BatchAvatarRequest
{
InputKind = "PlainText",
Inputs =
[
new BatchAvatarInput
{
Content = "Hi, I'm a virtual assistant created by Microsoft.",
},
],
SynthesisConfig = new()
{
Voice = "en-US-AvaMultilingualNeural",
},
AvatarConfig = new()
{
TalkingAvatarCharacter = "lisa", // Avatar character
TalkingAvatarStyle = "graceful-sitting", // Avatar style, required for prebuilt avatar, optional for custom avatar
VideoFormat = "mp4", // mp4 or webm, webm is required for transparent background
VideoCodec = "h264", // hevc, h264 or vp9, vp9 is required for transparent background; default is hevc
SubtitleType = "soft_embedded",
BackgroundColor = "#FFFFFFFF", // background color in RGBA format, default is white; can be set to 'transparent' for transparent background
Customized = false, // Set to true if you want to use custom avatar
},
};

var response = await httpClient.PutAsJsonAsync(jobUri, requestBody, defaultJsonSerializerOptions);
await PrintResponseOnError(response);
response.EnsureSuccessStatusCode();
var createdJob = await response.Content.ReadFromJsonAsync<BatchAvatarJob>(defaultJsonSerializerOptions);
return createdJob!;
}

private static async Task<BatchAvatarJob> GetJobAsync(HttpClient httpClient, string jobUri)
{
var response = await httpClient.GetAsync(jobUri);
await PrintResponseOnError(response);
response.EnsureSuccessStatusCode();
var job = await response.Content.ReadFromJsonAsync<BatchAvatarJob>(defaultJsonSerializerOptions);
return job!;
}

private static async Task DeleteJobAsync(HttpClient httpClient, string jobUri)
{
var response = await httpClient.DeleteAsync(jobUri);
await PrintResponseOnError(response);
response.EnsureSuccessStatusCode();
}

private static async Task<IList<BatchAvatarJob>> ListJobsAsync(HttpClient httpClient, string listUri)
{
var allJobs = new List<BatchAvatarJob>();
var response = await httpClient.GetAsync(listUri);
await PrintResponseOnError(response);
response.EnsureSuccessStatusCode();

var pagedJobs = await response.Content.ReadFromJsonAsync<PaginatedResults<BatchAvatarJob>>(defaultJsonSerializerOptions);
allJobs.AddRange(pagedJobs!.Value);
while (pagedJobs!.NextLink is not null)
{
response = await httpClient.GetAsync(pagedJobs.NextLink);
await PrintResponseOnError(response);
response.EnsureSuccessStatusCode();

pagedJobs = await response.Content.ReadFromJsonAsync<PaginatedResults<BatchAvatarJob>>(defaultJsonSerializerOptions);
allJobs.AddRange(pagedJobs!.Value);
}

return allJobs;
}

private static async Task PrintResponseOnError(HttpResponseMessage response)
{
if (response.IsSuccessStatusCode)
{
return;
}

var responseBody = await response.Content.ReadAsStringAsync();
var requestId = response.Headers.GetValues("apim-request-id").FirstOrDefault();
Console.Error.WriteLine(responseBody);
Console.Error.WriteLine($"Request ID: {requestId}");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//

namespace BatchAvatarSample;

using BatchAvatarSample.dto;
using System;
using System.Collections.Generic;

internal static class RequestExamples
{
public static BatchAvatarRequest SsmlRequest = new()
{
InputKind = "Ssml",
Inputs =
[
new BatchAvatarInput
{
Content = """
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="en-US"><voice name="en-US-AvaMultilingualNeural">hello, this is my talking avatar</voice></speak>
""",
},
],
AvatarConfig = new()
{
TalkingAvatarCharacter = "lisa",
TalkingAvatarStyle = "graceful-sitting",
VideoFormat = "mp4",
VideoCodec = "h264",
SubtitleType = "soft_embedded",
BackgroundColor = "#FFFFFFFF",
},
};

public static BatchAvatarRequest CustomVoiceRequest = new()
{
InputKind = "PlainText",
Inputs =
[
new BatchAvatarInput
{
Content = "Hi, I'm a virtual assistant created by Microsoft.",
},
],
// Replace with your custom voice name and deployment ID if you want to use custom voice.
// Multiple voices are supported, the mixture of custom voices and platform voices is allowed.
// Invalid voice name or deployment ID will be rejected.
CustomVoices = new Dictionary<string, Guid>
{
["YOUR_CUSTOM_VOICE_NAME"] = Guid.Parse("YOUR_CUSTOM_VOICE_DEPLOYMENT_ID"),
},
SynthesisConfig = new()
{
Voice = "YOUR_CUSTOM_VOICE_NAME",
},
AvatarConfig = new()
{
TalkingAvatarCharacter = "lisa",
TalkingAvatarStyle = "graceful-sitting",
VideoFormat = "mp4",
VideoCodec = "h264",
SubtitleType = "soft_embedded",
BackgroundColor = "#FFFFFFFF",
},
};
}
Loading

0 comments on commit 7660a7f

Please sign in to comment.