Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature Dubbing #50

Merged
merged 16 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,4 @@ ASALocalRun/
# BeatPulse healthcheck temp database
healthchecksdb
.vscode
*.dubbed.*
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Threading.Tasks;
using Microsoft.AspNetCore.Http;
using System.Threading.Tasks;

namespace ElevenLabs.Proxy
{
Expand Down
108 changes: 108 additions & 0 deletions ElevenLabs-DotNet-Tests/TestFixture_08_DubbingEndpoint.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using ElevenLabs.Dubbing;
using NUnit.Framework;
using System;
using System.IO;
using System.Threading.Tasks;

namespace ElevenLabs.Tests
{
internal class TestFixture_08_DubbingEndpoint : AbstractTestFixture
{
[Test]
public async Task Test_01_Dubbing_File()
{
Assert.NotNull(ElevenLabsClient.DubbingEndpoint);
var filePath = Path.GetFullPath("../../../Assets/test_sample_01.ogg");
var request = new DubbingRequest(filePath, "es", "en", 1);
var response = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress<DubbingProjectMetadata>(metadata =>
{
switch (metadata.Status)
{
case "dubbing":
Console.WriteLine($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
break;
case "dubbed":
Console.WriteLine($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
break;
default:
Console.WriteLine($"Status: {metadata.Status}");
break;
}
}));
Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId));
Assert.IsTrue(response.ExpectedDurationSeconds > 0);

var srcFile = new FileInfo(filePath);
var dubbedPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}{srcFile.Extension}");
{
await using var fs = File.Open(dubbedPath.FullName, FileMode.Create);
await foreach (var chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage))
{
await fs.WriteAsync(chunk);
}
}
Assert.IsTrue(dubbedPath.Exists);
Assert.IsTrue(dubbedPath.Length > 0);

var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
{
var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage);
await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
}
Assert.IsTrue(transcriptPath.Exists);
Assert.IsTrue(transcriptPath.Length > 0);

await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(response.DubbingId);
}

[Test]
public async Task Test_02_Dubbing_Url()
{
Assert.NotNull(ElevenLabsClient.DubbingEndpoint);

var uri = new Uri("https://youtu.be/Zo5-rhYOlNk");
var request = new DubbingRequest(uri, "ja", "en", 1, true);
var response = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress<DubbingProjectMetadata>(metadata =>
{
switch (metadata.Status)
{
case "dubbing":
Console.WriteLine($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
break;
case "dubbed":
Console.WriteLine($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
break;
default:
Console.WriteLine($"Status: {metadata.Status}");
break;
}
}));
Assert.IsFalse(string.IsNullOrEmpty(response.DubbingId));
Assert.IsTrue(response.ExpectedDurationSeconds > 0);

var assetsDir = Path.GetFullPath("../../../Assets");
var dubbedPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.mp4"));
{
await using var fs = File.Open(dubbedPath.FullName, FileMode.Create);
await foreach (var chunk in ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(response.DubbingId, request.TargetLanguage))
{
await fs.WriteAsync(chunk);
}
}
Assert.IsTrue(dubbedPath.Exists);
Assert.IsTrue(dubbedPath.Length > 0);

var transcriptPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.srt"));
{
var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(response.DubbingId, request.TargetLanguage);
await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
}
Assert.IsTrue(transcriptPath.Exists);
Assert.IsTrue(transcriptPath.Length > 0);

await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(response.DubbingId);
}
}
}
222 changes: 222 additions & 0 deletions ElevenLabs-DotNet/Dubbing/DubbingEndpoint.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using ElevenLabs.Extensions;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;

namespace ElevenLabs.Dubbing
{
/// <summary>
/// Access to dubbing an audio or video file into a given language.
/// </summary>
public sealed class DubbingEndpoint(ElevenLabsClient client) : ElevenLabsBaseEndPoint(client)
{
private const string DubbingId = "dubbing_id";
private const string ExpectedDurationSecs = "expected_duration_sec";

protected override string Root => "dubbing";

/// <summary>
/// Dubs provided audio or video file into given language.
/// </summary>
/// <param name="request">The <see cref="DubbingRequest"/> containing dubbing configuration and files.</param>
/// <param name="progress"></param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <param name="maxRetries"></param>
/// <param name="pollingInterval"></param>
/// <returns> <see cref="DubbingProjectMetadata"/>.</returns>
public async Task<DubbingProjectMetadata> DubAsync(DubbingRequest request, int? maxRetries = null, TimeSpan? pollingInterval = null, IProgress<DubbingProjectMetadata> progress = null, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
using var payload = new MultipartFormDataContent();

try
{
foreach (var (fileName, mediaType, stream) in request.Files)
{
await payload.AppendFileToFormAsync("file", stream, fileName, new(mediaType), cancellationToken);
}

if (!string.IsNullOrEmpty(request.ProjectName))
{
payload.Add(new StringContent(request.ProjectName), "name");
}

if (request.SourceUrl != null)
{
payload.Add(new StringContent(request.SourceUrl.ToString()), "source_url");
}

if (!string.IsNullOrEmpty(request.SourceLanguage))
{
payload.Add(new StringContent(request.SourceLanguage), "source_lang");
}

if (!string.IsNullOrEmpty(request.TargetLanguage))
{
payload.Add(new StringContent(request.TargetLanguage), "target_lang");
}

if (request.NumberOfSpeakers.HasValue)
{
payload.Add(new StringContent(request.NumberOfSpeakers.Value.ToString(CultureInfo.InvariantCulture)), "num_speakers");
}

if (request.Watermark.HasValue)
{
payload.Add(new StringContent(request.Watermark.Value.ToString()), "watermark");
}

if (request.StartTime.HasValue)
{
payload.Add(new StringContent(request.StartTime.Value.ToString(CultureInfo.InvariantCulture)), "start_time");
}

if (request.EndTime.HasValue)
{
payload.Add(new StringContent(request.EndTime.Value.ToString(CultureInfo.InvariantCulture)), "end_time");
}

if (request.HighestResolution.HasValue)
{
payload.Add(new StringContent(request.HighestResolution.Value.ToString()), "highest_resolution");
}
}
finally
{
request.Dispose();
}

using var response = await client.Client.PostAsync(GetUrl(), payload, cancellationToken).ConfigureAwait(false);
var responseBody = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
var dubResponse = JsonSerializer.Deserialize<DubbingResponse>(responseBody);
var metadata = await WaitForDubbingCompletionAsync(dubResponse, maxRetries ?? 60, pollingInterval ?? TimeSpan.FromSeconds(dubResponse.ExpectedDurationSeconds), pollingInterval == null, progress, cancellationToken);
return metadata;
}

private async Task<DubbingProjectMetadata> WaitForDubbingCompletionAsync(DubbingResponse dubbingResponse, int maxRetries, TimeSpan pollingInterval, bool adjustInterval, IProgress<DubbingProjectMetadata> progress = null, CancellationToken cancellationToken = default)
{
var stopwatch = Stopwatch.StartNew();

for (var i = 1; i < maxRetries + 1; i++)
{
var metadata = await GetDubbingProjectMetadataAsync(dubbingResponse, cancellationToken).ConfigureAwait(false);
metadata.ExpectedDurationSeconds = dubbingResponse.ExpectedDurationSeconds;

if (metadata.Status.Equals("dubbed", StringComparison.Ordinal))
{
stopwatch.Stop();
metadata.TimeCompleted = stopwatch.Elapsed;
progress?.Report(metadata);
return metadata;
}

progress?.Report(metadata);

if (metadata.Status.Equals("dubbing", StringComparison.Ordinal))
{
if (EnableDebug)
{
Console.WriteLine($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds.");
}

if (adjustInterval)
{
pollingInterval = TimeSpan.FromSeconds(dubbingResponse.ExpectedDurationSeconds / Math.Pow(2, i));
}

await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false);
}
else
{
throw new Exception($"Dubbing for {dubbingResponse.DubbingId} failed: {metadata.Error}");
}
}

throw new TimeoutException($"Dubbing for {dubbingResponse.DubbingId} timed out or exceeded expected duration.");
}

/// <summary>
/// Returns metadata about a dubbing project, including whether it’s still in progress or not.
/// </summary>
/// <param name="dubbingId"></param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="DubbingProjectMetadata"/>.</returns>
public async Task<DubbingProjectMetadata> GetDubbingProjectMetadataAsync(string dubbingId, CancellationToken cancellationToken = default)
{
using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}"), cancellationToken).ConfigureAwait(false);
var responseBody = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
return JsonSerializer.Deserialize<DubbingProjectMetadata>(responseBody);
}

/// <summary>
/// Returns transcript for the dub in the specified format (SRT or WebVTT).
/// </summary>
/// <param name="dubbingId">The ID of the dubbing project.</param>
/// <param name="languageCode">The language code of the transcript.</param>
/// <param name="formatType">Optional. The format type of the transcript file, either 'srt' or 'webvtt'.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns>
/// A task representing the asynchronous operation. The task completes with the transcript content
/// as a string in the specified format.
/// </returns>
/// <remarks>
/// If <paramref name="formatType"/> is not specified, the method retrieves the transcript in its default format.
/// </remarks>
public async Task<string> GetTranscriptForDubAsync(string dubbingId, string languageCode, DubbingFormat formatType = DubbingFormat.Srt, CancellationToken cancellationToken = default)
{
var @params = new Dictionary<string, string> { { "format_type", formatType.ToString().ToLower() } };
using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}/transcript/{languageCode}", @params), cancellationToken).ConfigureAwait(false);
return await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
}

/// <summary>
/// Returns dubbed file as a streamed file.
/// </summary>
/// <param name="dubbingId">The ID of the dubbing project.</param>
/// <param name="languageCode">The language code of the dubbed content.</param>
/// <param name="bufferSize">The size of the buffer used to read data from the response stream. Default is 8192 bytes.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns>
/// An asynchronous enumerable of byte arrays representing the dubbed file content. Each byte array
/// contains a chunk of the dubbed file data.
/// </returns>
/// <remarks>
/// This method streams the dubbed file content in chunks to optimize memory usage and improve performance.
/// Adjust the <paramref name="bufferSize"/> parameter based on your specific requirements to achieve optimal performance.
/// </remarks>
public async IAsyncEnumerable<byte[]> GetDubbedFileAsync(string dubbingId, string languageCode, int bufferSize = 8192, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
using var response = await client.Client.GetAsync(GetUrl($"/{dubbingId}/audio/{languageCode}"), HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
await response.CheckResponseAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
await using var responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
var buffer = new byte[bufferSize];
int bytesRead;

while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0)
{
var chunk = new byte[bytesRead];
Array.Copy(buffer, chunk, bytesRead);
yield return chunk;
}
}

/// <summary>
/// Deletes a dubbing project.
/// </summary>
/// <param name="dubbingId">The ID of the dubbing project.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
public async Task DeleteDubbingProjectAsync(string dubbingId, CancellationToken cancellationToken = default)
{
using var response = await client.Client.DeleteAsync(GetUrl($"/{dubbingId}"), cancellationToken).ConfigureAwait(false);
await response.CheckResponseAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
}
}
}
14 changes: 14 additions & 0 deletions ElevenLabs-DotNet/Dubbing/DubbingFormat.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Runtime.Serialization;

namespace ElevenLabs.Dubbing
{
public enum DubbingFormat
{
[EnumMember(Value = "srt")]
Srt,
[EnumMember(Value = "webvtt")]
WebVtt
}
}
37 changes: 37 additions & 0 deletions ElevenLabs-DotNet/Dubbing/DubbingProjectMetadata.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System;
using System.Collections.Generic;
using System.Text.Json.Serialization;

namespace ElevenLabs.Dubbing
{
public sealed class DubbingProjectMetadata
{
[JsonInclude]
[JsonPropertyName("dubbing_id")]
public string DubbingId { get; private set; }

[JsonInclude]
[JsonPropertyName("name")]
public string Name { get; private set; }

[JsonInclude]
[JsonPropertyName("status")]
public string Status { get; private set; }

[JsonInclude]
[JsonPropertyName("target_languages")]
public List<string> TargetLanguages { get; private set; }

[JsonInclude]
[JsonPropertyName("error")]
public string Error { get; private set; }

[JsonIgnore]
public float ExpectedDurationSeconds { get; internal set; }

[JsonIgnore]
public TimeSpan TimeCompleted { get; internal set; }
}
}
Loading