Add a C# sample for post call analytics (#2631)

* Add post call analytics sample with Azure Open AI * Add README. Cleanup * Add examples to help * Fix help * Update Program.cs * Remove byte order mark
Azure-Samples · Oct 17, 2024 · eeac4cd · eeac4cd
1 parent 4d5c1ac
commit eeac4cd
Show file tree

Hide file tree

Showing 4 changed files with 184 additions and 6 deletions.
diff --git a/scenarios/README.md b/scenarios/README.md
@@ -33,8 +33,8 @@ Connection:
 
 Input:
 
-* `--input FILE`: Input audio from file. The default input is the microphone. 
-* `--format FORMAT`: Use compressed audio format. Valid only with `--file`. Valid values are `alaw`, `any`, `flac`, `mp3`, `mulaw`, and `ogg_opus`. The default value is `any`. To use a `wav` file, don't specify the format. This option is not available with the JavaScript captioning sample. For compressed audio files such as MP4, install GStreamer and see [How to use compressed input audio](~/articles/cognitive-services/speech-service/how-to-use-codec-compressed-audio-input-streams.md). 
+* `--input FILE`: Input audio from file. The default input is the microphone.
+* `--format FORMAT`: Use compressed audio format. Valid only with `--file`. Valid values are `alaw`, `any`, `flac`, `mp3`, `mulaw`, and `ogg_opus`. The default value is `any`. To use a `wav` file, don't specify the format. This option is not available with the JavaScript captioning sample. For compressed audio files such as MP4, install GStreamer and see [How to use compressed input audio](~/articles/cognitive-services/speech-service/how-to-use-codec-compressed-audio-input-streams.md).
 
 Language:
 
@@ -43,7 +43,7 @@ Language:
 Recognition:
 
 * `--offline`: Output offline results. Overrides `--realTime`. Default output mode is offline.
-* `--realTime`: Output real-time results. 
+* `--realTime`: Output real-time results.
 
 Real-time output includes `Recognizing` event results. The default offline output is `Recognized` event results only. These are always written to the console, never to an output file. The `--quiet` option overrides this. For more information, see [Get speech recognition results](~/articles/cognitive-services/speech-service/get-speech-recognition-results.md).
 
@@ -64,6 +64,29 @@ Output:
 * `--profanity OPTION`: Valid values: raw, remove, mask. For more information, see [Profanity filter](~/articles/cognitive-services/speech-service/display-text-format.md#profanity-filter) concepts.
 * `--threshold NUMBER`: Set stable partial result threshold. The default value is `3`. This option is only applicable when you use the `realTime` flag. For more information, see [Get partial results](~/articles/cognitive-services/speech-service/captioning-concepts.md#get-partial-results) concepts.
 
+## Post-call analytics with Fast Transcription API and Azure OpenAI SDK
+
+Summarize a audio recording using <a href="https://learn.microsoft.com/azure/ai-services/speech-service/fast-transcription-create" title="Fast Transcription API" target="_blank">Fast Transcription API</a> with <a href="https://azure.microsoft.com/products/ai-services/openai-service" title="Fast Transcription API" target="_blank">Azure OpenAI</a> SDK.
+
+### Usage and arguments
+
+Connection:
+
+* `--speechKey KEY`: Your <a href="https://portal.azure.com/#create/Microsoft.CognitiveServicesAllInOne" title="Create a Cognitive Services resource"  target="_blank">Cognitive Services</a> or <a href="https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices"  title="Create a Speech resource"  target="_blank">Speech</a> resource key. Required for audio transcriptions with the `--input` from URL option.
+* `--speechRegion REGION`: Your <a href="https://portal.azure.com/#create/Microsoft.CognitiveServicesAllInOne" title="Create a Cognitive Services resource"  target="_blank">Cognitive Services</a> or <a href="https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices"  title="Create a Speech resource"  target="_blank">Speech</a> resource region. Required for audio transcriptions with the `--input` from URL option. Examples: `eastus`, `northeurope`
+
+* `--openAiKey KEY`: Your <a href="https://ms.portal.azure.com/#create/Microsoft.CognitiveServicesOpenAI" title="Create an Azure OpenAI resource" target="_blank">Azure OpenAI</a> resource key. Required.
+* `--openAiEndpoint ENDPOINT`: Your <a href="https://portal.azure.com/#create/Microsoft.CognitiveServicesAllInOne" title="Create an Azure OpenAI resource" target="_blank">Azure OpenAI</a> resource endpoint. Required. Example: `https://YourResourceName.openai.azure.com`
+* `--openAiDeploymentName OPENAIDEPLOYMENTNAME`: Your <a href="https://portal.azure.com/#create/Microsoft.CognitiveServicesAllInOne" title="Create an Azure OpenAI resource" target="_blank">Azure OpenAI</a> deployment name. Required. Example: my-gpt-4o-mini
+
+Input:
+
+* `--inputAudio URL`: Input audio from URL. You must set either the `--input` or `--jsonInput` option.
+
+Output:
+
+* `--help`: Show the usage help and stop
+
 ## Call Center Transcription and Analytics
 
 Visit the [call center transcription quickstart](https://learn.microsoft.com/azure/cognitive-services/speech-service/call-center-quickstart) for a detailed guide on how to get started transcribing call recordings using the Speech and Language Services.
@@ -80,10 +103,10 @@ Connection:
 
 Input:
 
-* `--input URL`: Input audio from URL. You must set either the `--input` or `--jsonInput` option. 
+* `--input URL`: Input audio from URL. You must set either the `--input` or `--jsonInput` option.
 * `--jsonInput FILE`: Input an existing batch transcription JSON result from FILE. With this option, you only need a Language resource to process a transcription that you already have. With this option, you don't need an audio file or a Speech resource. Overrides `--input`. You must set either the `--input` or `--jsonInput` option.
 * `--stereo`: Indicates that the audio via ```input URL` should be in stereo format. If stereo isn't specified, then mono 16khz 16 bit PCM wav files are assumed. Diarization of mono files is used to separate multiple speakers. Diarization of stereo files isn't supported, since 2-channel stereo files should already have one speaker per channel.
-* `--certificate`: The PEM certificate file. Required for C++. 
+* `--certificate`: The PEM certificate file. Required for C++.
 
 Language:
 
@@ -93,4 +116,4 @@ Language:
 Output:
 
 * `--help`: Show the usage help and stop
-* `--output FILE`: Output the transcription, sentiment, conversation PII, and conversation summaries in JSON format to a text file. For more information, see [output examples](../../../call-center-quickstart.md#check-results).
+* `--output FILE`: Output the transcription, sentiment, conversation PII, and conversation summaries in JSON format to a text file. For more information, see [output examples](../../../call-center-quickstart.md#check-results).
diff --git a/scenarios/csharp/dotnetcore/post-call-analytics/Program.cs b/scenarios/csharp/dotnetcore/post-call-analytics/Program.cs
@@ -0,0 +1,114 @@
+using Azure.AI.OpenAI;
+using OpenAI.Chat;
+using System.ClientModel;
+using System.CommandLine;
+using System.Net.Http.Json;
+using System.Text.Json;
+
+namespace PostCallAnalytics
+{
+    public sealed class FastTranscriptionOptions
+    {
+        public string[] Locales { get; set; } = { "en-US" };
+    }
+
+    public class Program
+    {
+        internal static async Task<ByteArrayContent> GetAudioContentAsync(FileInfo inputAudio)
+        {
+            var audioStream = inputAudio.OpenRead();
+            byte[] byteArray;
+            using (MemoryStream memoryStream = new MemoryStream())
+            {
+                await audioStream.CopyToAsync(memoryStream);
+                byteArray = memoryStream.ToArray();
+            }
+            return new ByteArrayContent(byteArray);
+        }
+
+        internal static async Task<string?> TranscribeAsync(string speechKey, string speechRegion, FileInfo inputAudio)
+        {
+            var speechEndpoint = $"https://{speechRegion}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-05-15-preview";
+
+            var httpClient = new HttpClient();
+            httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", speechKey);
+
+            var multipartFormDataContent = new MultipartFormDataContent();
+            var fastTranscriptionOptions = new FastTranscriptionOptions();
+            var fastTranscriptionOptionsDefinition = JsonContent.Create(fastTranscriptionOptions);
+            multipartFormDataContent.Add(fastTranscriptionOptionsDefinition, "definition");
+
+            var streamContent = await GetAudioContentAsync(inputAudio);
+            streamContent.Headers.Add("Content-Type", "multipart/form-data");
+
+            // Speech Fast TranscribeAsync requires a filename to be specified when sending a file.
+            multipartFormDataContent.Add(streamContent, "audio", "audio.wav");
+
+            var response = await httpClient.PostAsync(speechEndpoint, multipartFormDataContent, CancellationToken.None);
+            Console.WriteLine($"{speechEndpoint} : {response.StatusCode}");
+
+            var content = await response.Content.ReadAsStringAsync();
+            var json = JsonDocument.Parse(content).RootElement;
+
+            var combinedTranscript = json.GetProperty("combinedPhrases")[0].GetProperty("text").GetString();
+            return combinedTranscript;
+        }
+
+        internal static async Task<string> SummarizeAsync(string openAiKey, string openAiEndpoint, string deploymentOrModelName, string transcription)
+        {
+            var azureClient = new AzureOpenAIClient(new Uri(openAiEndpoint), new ApiKeyCredential(openAiKey));
+            var chatClient = azureClient.GetChatClient(deploymentOrModelName);
+
+            var completion = await chatClient.CompleteChatAsync(
+                [
+                    new SystemChatMessage("You are an AI assistant that helps extract information from customer call center transcripts. Summarize the conversation in a couple sentences."),
+                    new UserChatMessage(transcription)
+                ]
+            );
+
+            Console.WriteLine($"{openAiEndpoint} : {completion.GetRawResponse().Status}");
+
+            var summary = completion.Value.Content[0].Text;
+            return summary;
+        }
+
+        internal static async Task AnalyzeAudioAsync(string speechKey, string speechRegion, FileInfo inputAudio, string openAiKey, string openAiEndpoint, string deploymentOrModelName)
+        {
+            if (string.IsNullOrEmpty(speechKey) || string.IsNullOrEmpty(speechRegion) || (inputAudio == null || !inputAudio.Exists) || string.IsNullOrEmpty(openAiKey) || string.IsNullOrEmpty(openAiEndpoint) || string.IsNullOrEmpty(deploymentOrModelName))
+            {
+                Console.WriteLine("Error: missing required option");
+                return;
+            }
+
+            var transcription = await TranscribeAsync(speechKey, speechRegion, inputAudio);
+            Console.WriteLine($"Transcription: {transcription}");
+
+            var summary = await SummarizeAsync(openAiKey, openAiEndpoint, deploymentOrModelName, transcription);
+            Console.WriteLine($"Summary: {summary}");
+        }
+
+        public async static Task<int> Main(string[] args)
+        {
+            var inputAudio = new Option<FileInfo>(name: "--inputAudio", description: "Path to the audio file. Required.");
+            var speechKey = new Option<string>(name: "--speechKey", description: "Your Cognitive Services or Speech resource key. Required.");
+            var speechRegion = new Option<string>(name: "--speechRegion", description: "Your Cognitive Services or Speech resource region. Example: eastus, northeurope. Required.");
+            var openAiKey = new Option<string>(name: "--openAiKey", description: "Your Azure OpenAI resource key. Required.");
+            var openAiEndpoint = new Option<string>(name: "--openAiEndpoint", description: "Your Azure OpenAI resource endpoint. Required. Example: https://YourResourceName.openai.azure.com");
+            var openAiDeploymentName = new Option<string>(name: "--openAiDeploymentName", description: "Your Azure OpenAI deployment name. Example: my-gpt-4o-mini. Required.");
+
+            var rootCommand = new RootCommand()
+            {
+                inputAudio,
+                speechKey,
+                speechRegion,
+                openAiKey,
+                openAiEndpoint,
+                openAiDeploymentName
+            };
+
+            rootCommand.SetHandler(AnalyzeAudioAsync, speechKey, speechRegion, inputAudio, openAiKey, openAiEndpoint, openAiDeploymentName);
+
+            return await rootCommand.InvokeAsync(args);
+        }
+    }
+}
diff --git a/scenarios/csharp/dotnetcore/post-call-analytics/post-call-analytics.csproj b/scenarios/csharp/dotnetcore/post-call-analytics/post-call-analytics.csproj
@@ -0,0 +1,16 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <Nullable>enable</Nullable>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <RootNamespace>PostCallAnalytics</RootNamespace>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Azure.AI.OpenAI" Version="2.0.0" />
+    <PackageReference Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
+  </ItemGroup>
+
+</Project>
diff --git a/scenarios/csharp/dotnetcore/post-call-analytics/post-call-analytics.sln b/scenarios/csharp/dotnetcore/post-call-analytics/post-call-analytics.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.11.35303.130
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "post-call-analytics", "post-call-analytics.csproj", "{20937E87-0B3F-44C6-8AD1-33E30F67A762}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{20937E87-0B3F-44C6-8AD1-33E30F67A762}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{20937E87-0B3F-44C6-8AD1-33E30F67A762}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{20937E87-0B3F-44C6-8AD1-33E30F67A762}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{20937E87-0B3F-44C6-8AD1-33E30F67A762}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {53CEF230-04DF-4757-AFCA-CE4C02ADAAFD}
+	EndGlobalSection
+EndGlobal