From 534c9d7736c711389af1a35ba9bb141366d55df2 Mon Sep 17 00:00:00 2001 From: Stephen Hodgson Date: Thu, 26 Oct 2023 14:16:04 -0400 Subject: [PATCH] add partial clip callback --- .../TextToSpeech/TextToSpeechEndpoint.cs | 24 +++++++++++++++++-- ElevenLabs-DotNet/VoiceClip.cs | 4 ++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs index f0a1bc2..31564d1 100644 --- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs +++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs @@ -48,9 +48,13 @@ public TextToSpeechEndpoint(ElevenLabsClient api) : base(api) { } /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings /// (best latency, but can mispronounce eg numbers and dates). /// + /// + /// Optional, Callback to enable streaming audio as it comes in.
+ /// Returns partial s who's overwritten with the next chunk of data. + /// /// Optional, . /// . - public async Task TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, CancellationToken cancellationToken = default) + public async Task TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, Func partialClipCallback = null, CancellationToken cancellationToken = default) { if (text.Length > 5000) { @@ -90,7 +94,23 @@ public async Task TextToSpeechAsync(string text, Voice voice, VoiceSe try { - await responseStream.CopyToAsync(memoryStream, cancellationToken).ConfigureAwait(false); + if (partialClipCallback == null) + { + await responseStream.CopyToAsync(memoryStream, cancellationToken).ConfigureAwait(false); + } + else + { + int bytesRead; + var buffer = new byte[8192]; + + while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0) + { + var segment = new ReadOnlyMemory(buffer, 0, bytesRead); + await partialClipCallback(new VoiceClip(clipId, text, voice, segment)).ConfigureAwait(false); + await memoryStream.WriteAsync(segment, cancellationToken).ConfigureAwait(false); + } + } + clipData = memoryStream.ToArray(); } finally diff --git a/ElevenLabs-DotNet/VoiceClip.cs b/ElevenLabs-DotNet/VoiceClip.cs index 27c020a..3f76866 100644 --- a/ElevenLabs-DotNet/VoiceClip.cs +++ b/ElevenLabs-DotNet/VoiceClip.cs @@ -8,13 +8,13 @@ namespace ElevenLabs { public sealed class VoiceClip { - internal VoiceClip(string id, string text, Voice voice, byte[] clipData) + internal VoiceClip(string id, string text, Voice voice, ReadOnlyMemory clipData) { Id = id; Text = text; Voice = voice; TextHash = $"{id}{text}".GenerateGuid().ToString(); - ClipData = new ReadOnlyMemory(clipData); + ClipData = clipData; } public string Id { get; }