From 54ddf155431ad803e182fc4e33fd211192c892ce Mon Sep 17 00:00:00 2001 From: Henry van der Vegte Date: Mon, 7 Nov 2022 17:50:57 +0100 Subject: [PATCH] [IngestionClient] Fix database deployment issue - move database deployment to code (EF) (#1728) * move DB migration to code * Update Connector.csproj * continue with DB migration * start with db connector update * continue * fixes * fix migration * fixes * fix SilenceBetweenCurrentAndPreviousSegmentInMs value * small fixes * catch db exceptions * update templates and GH action * remove BOMs * Update guide.md * Update IngestionClientDbContextExtensions.cs * Update DesignTimeSpeechServicesDbContextFactory.cs * Update Program.cs * remove unused references * Delete profile.arm.json --- .github/workflows/ingestion_client.yaml | 11 - .../ingestion-client/BatchIngestionClient.sln | 6 + .../Connector/Connector.csproj | 3 +- .../Database/IngestionClientDbContext.cs | 28 ++ .../IngestionClientDbContextExtensions.cs | 154 ++++++++++ .../Models/CombinedRecognizedPhrase.cs | 58 ++++ .../Connector/Database/Models/DbModelBase.cs | 20 ++ .../Connector/Database/Models/NBest.cs | 58 ++++ .../Database/Models/RecognizedPhrase.cs | 55 ++++ .../Database/Models/Transcription.cs | 62 ++++ .../Connector/Database/Models/Word.cs | 39 +++ .../Connector/DatabaseConnector.cs | 239 ---------------- .../20221107110715_Init.Designer.cs | 270 ++++++++++++++++++ .../Migrations/20221107110715_Init.cs | 173 +++++++++++ .../IngestionClientDbContextModelSnapshot.cs | 268 +++++++++++++++++ .../DatabaseMigrator/DatabaseMigrator.csproj | 17 ++ ...esignTimeSpeechServicesDbContextFactory.cs | 25 ++ .../DatabaseMigrator/Program.cs | 20 ++ .../Database/DatabaseConfigProvider.cs | 38 +++ .../Database/DatabaseInitializationService.cs | 22 ++ .../FetchTranscription/FetchTranscription.cs | 17 +- .../FetchTranscription.csproj | 3 +- .../FetchTranscription/Startup.cs | 35 +++ .../TranscriptionProcessor.cs | 239 +++++++++------- .../Setup/ArmTemplateBatch.json | 22 +- .../Setup/ArmTemplateRealtime.json | 2 +- .../Setup/IngestionClient.bacpac | Bin 4595 -> 0 bytes .../ingestion/ingestion-client/Setup/guide.md | 8 +- 28 files changed, 1504 insertions(+), 388 deletions(-) create mode 100644 samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContext.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContextExtensions.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Database/Models/CombinedRecognizedPhrase.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Database/Models/DbModelBase.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Database/Models/NBest.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Database/Models/RecognizedPhrase.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Database/Models/Transcription.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Database/Models/Word.cs delete mode 100644 samples/ingestion/ingestion-client/Connector/DatabaseConnector.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.Designer.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.cs create mode 100644 samples/ingestion/ingestion-client/Connector/Migrations/IngestionClientDbContextModelSnapshot.cs create mode 100644 samples/ingestion/ingestion-client/DatabaseMigrator/DatabaseMigrator.csproj create mode 100644 samples/ingestion/ingestion-client/DatabaseMigrator/DesignTimeSpeechServicesDbContextFactory.cs create mode 100644 samples/ingestion/ingestion-client/DatabaseMigrator/Program.cs create mode 100644 samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseConfigProvider.cs create mode 100644 samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseInitializationService.cs create mode 100644 samples/ingestion/ingestion-client/FetchTranscription/Startup.cs delete mode 100644 samples/ingestion/ingestion-client/Setup/IngestionClient.bacpac diff --git a/.github/workflows/ingestion_client.yaml b/.github/workflows/ingestion_client.yaml index 2093274f1..d0455e6f7 100644 --- a/.github/workflows/ingestion_client.yaml +++ b/.github/workflows/ingestion_client.yaml @@ -113,14 +113,3 @@ jobs: asset_path: ./RealtimeTranscription.zip asset_name: RealtimeTranscription.zip asset_content_type: application/zip - - - name: Upload Bacpac File - id: upload-release-asset-4 - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./samples/ingestion/ingestion-client/Setup/IngestionClient.bacpac - asset_name: IngestionClient.bacpac - asset_content_type: application/octet-stream \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/BatchIngestionClient.sln b/samples/ingestion/ingestion-client/BatchIngestionClient.sln index daf4e6102..e1cb1f1f1 100644 --- a/samples/ingestion/ingestion-client/BatchIngestionClient.sln +++ b/samples/ingestion/ingestion-client/BatchIngestionClient.sln @@ -14,6 +14,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "StartTranscriptionByTimer", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RealtimeTranscription", "RealtimeTranscription\RealtimeTranscription.csproj", "{5B4B7645-41AD-4951-AA06-44DF94CDEB8D}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DatabaseMigrator", "DatabaseMigrator\DatabaseMigrator.csproj", "{5BD38646-D3F3-481B-909E-353750AC5384}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -44,6 +46,10 @@ Global {5B4B7645-41AD-4951-AA06-44DF94CDEB8D}.Debug|Any CPU.Build.0 = Debug|Any CPU {5B4B7645-41AD-4951-AA06-44DF94CDEB8D}.Release|Any CPU.ActiveCfg = Release|Any CPU {5B4B7645-41AD-4951-AA06-44DF94CDEB8D}.Release|Any CPU.Build.0 = Release|Any CPU + {5BD38646-D3F3-481B-909E-353750AC5384}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5BD38646-D3F3-481B-909E-353750AC5384}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5BD38646-D3F3-481B-909E-353750AC5384}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5BD38646-D3F3-481B-909E-353750AC5384}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/samples/ingestion/ingestion-client/Connector/Connector.csproj b/samples/ingestion/ingestion-client/Connector/Connector.csproj index 5163aca8d..1a6daf0c9 100644 --- a/samples/ingestion/ingestion-client/Connector/Connector.csproj +++ b/samples/ingestion/ingestion-client/Connector/Connector.csproj @@ -6,7 +6,8 @@ + + - diff --git a/samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContext.cs b/samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContext.cs new file mode 100644 index 000000000..8ecc0766a --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContext.cs @@ -0,0 +1,28 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database +{ + using Connector.Database.Models; + using Microsoft.EntityFrameworkCore; + + public class IngestionClientDbContext : DbContext + { + public IngestionClientDbContext(DbContextOptions options) + : base(options) + { + } + + public DbSet Transcriptions { get; set; } + + public DbSet CombinedRecognizedPhrases { get; set; } + + public DbSet NBests { get; set; } + + public DbSet RecognizedPhrases { get; set; } + + public DbSet Words { get; set; } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContextExtensions.cs b/samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContextExtensions.cs new file mode 100644 index 000000000..d9c4e046d --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/IngestionClientDbContextExtensions.cs @@ -0,0 +1,154 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database +{ + using System; + using System.Collections.Generic; + using System.Globalization; + using System.Linq; + using System.Threading.Tasks; + + using Connector.Database.Models; + + public static class IngestionClientDbContextExtensions + { + private const int MaxNBestsPerRecognizedPhrase = 1; + + public static async Task StoreTranscriptionAsync( + this IngestionClientDbContext ingestionClientDbContext, + Guid transcriptionId, + string locale, + string fileName, + float approximateCost, + SpeechTranscript speechTranscript) + { + _ = ingestionClientDbContext ?? throw new ArgumentNullException(nameof(ingestionClientDbContext)); + _ = speechTranscript ?? throw new ArgumentNullException(nameof(speechTranscript)); + + var transcription = new Transcription( + id: transcriptionId, + locale: locale, + name: fileName, + source: speechTranscript.Source, + timestamp: DateTime.Parse(speechTranscript.Timestamp, CultureInfo.InvariantCulture), + duration: speechTranscript.Duration ?? string.Empty, + durationInSeconds: TimeSpan.FromTicks(speechTranscript.DurationInTicks).TotalSeconds, + numberOfChannels: speechTranscript.CombinedRecognizedPhrases.Count(), + approximateCost: approximateCost); + var combinedRecognizedPhrases = new List(); + + var phrasesByChannel = speechTranscript.RecognizedPhrases.GroupBy(t => t.Channel); + + foreach (var phrases in phrasesByChannel) + { + var channel = phrases.Key; + var combinedPhrase = speechTranscript.CombinedRecognizedPhrases.Where(t => t.Channel == channel).FirstOrDefault(); + var combinedRecognizedPhraseDb = AddCombinedRecognizedPhrase(combinedPhrase, channel, phrases); + combinedRecognizedPhrases.Add(combinedRecognizedPhraseDb); + } + + transcription = transcription.WithCombinedRecognizedPhrases(combinedRecognizedPhrases); + + ingestionClientDbContext.Add(transcription); + var entitiesAdded = await ingestionClientDbContext.SaveChangesAsync().ConfigureAwait(false); + } + + private static CombinedRecognizedPhrase AddCombinedRecognizedPhrase(Connector.CombinedRecognizedPhrase combinedRecognizedPhrase, int channel, IEnumerable recognizedPhrases) + { + var combinedRecognizedPhraseDb = new CombinedRecognizedPhrase( + id: Guid.NewGuid(), + channel: channel, + lexical: combinedRecognizedPhrase?.Lexical ?? string.Empty, + itn: combinedRecognizedPhrase?.Lexical ?? string.Empty, + maskedItn: combinedRecognizedPhrase?.Lexical ?? string.Empty, + display: combinedRecognizedPhrase?.Lexical ?? string.Empty, + sentimentNegative: combinedRecognizedPhrase?.Sentiment?.Negative ?? 0d, + sentimentNeutral: combinedRecognizedPhrase?.Sentiment?.Neutral ?? 0d, + sentimentPositive: combinedRecognizedPhrase?.Sentiment?.Positive ?? 0d); + + var recognizedPhrasesDb = new List(); + + var orderedPhrases = recognizedPhrases.OrderBy(p => p.OffsetInTicks); + var previousEndInMs = 0.0; + foreach (var phrase in orderedPhrases) + { + var silenceBetweenCurrentAndPreviousSegmentInMs = Convert.ToInt32(Math.Max(0, TimeSpan.FromTicks(phrase.OffsetInTicks).TotalMilliseconds - previousEndInMs)); + + var recognizedPhraseDb = AddRecognizedPhrase(phrase, silenceBetweenCurrentAndPreviousSegmentInMs); + previousEndInMs = (TimeSpan.FromTicks(phrase.OffsetInTicks) + TimeSpan.FromTicks(phrase.DurationInTicks)).TotalMilliseconds; + + recognizedPhrasesDb.Add(recognizedPhraseDb); + } + + combinedRecognizedPhraseDb = combinedRecognizedPhraseDb.WithRecognizedPhrases(recognizedPhrasesDb); + return combinedRecognizedPhraseDb; + } + + private static RecognizedPhrase AddRecognizedPhrase(Connector.RecognizedPhrase recognizedPhrase, int silenceBetweenCurrentAndPreviousSegmentInMs) + { + var recognizedPhraseDb = new RecognizedPhrase( + id: Guid.NewGuid(), + recognitionStatus: recognizedPhrase.RecognitionStatus, + speaker: recognizedPhrase.Speaker, + channel: recognizedPhrase.Channel, + offset: recognizedPhrase.Offset, + duration: recognizedPhrase.Duration, + silenceBetweenCurrentAndPreviousSegmentInMs: silenceBetweenCurrentAndPreviousSegmentInMs); + + var nbestsDb = new List(); + + foreach (var nbestResult in recognizedPhrase.NBest.Take(MaxNBestsPerRecognizedPhrase)) + { + var nbestDb = AddNBestResult(nbestResult); + nbestsDb.Add(nbestDb); + } + + recognizedPhraseDb = recognizedPhraseDb.WithNBests(nbestsDb); + return recognizedPhraseDb; + } + + private static NBest AddNBestResult(Connector.NBest nbest) + { + var nbestDb = new NBest( + id: Guid.NewGuid(), + confidence: nbest.Confidence, + lexical: nbest.Lexical, + itn: nbest.ITN, + maskedItn: nbest.MaskedITN, + display: nbest.Display, + sentimentNegative: nbest.Sentiment?.Negative ?? 0d, + sentimentNeutral: nbest.Sentiment?.Neutral ?? 0d, + sentimentPositive: nbest.Sentiment?.Positive ?? 0d); + + if (nbest.Words != null) + { + var wordsDb = new List(); + + foreach (var word in nbest.Words) + { + var wordDb = CreateWord(word); + wordsDb.Add(wordDb); + } + + nbestDb = nbestDb.WithWords(wordsDb); + } + + return nbestDb; + } + + private static Word CreateWord(Connector.Words word) + { + var wordDb = new Word( + id: Guid.NewGuid(), + wordText: word.Word, + offset: word.Offset, + duration: word.Duration, + confidence: word.Confidence); + + return wordDb; + } + } +} \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/Connector/Database/Models/CombinedRecognizedPhrase.cs b/samples/ingestion/ingestion-client/Connector/Database/Models/CombinedRecognizedPhrase.cs new file mode 100644 index 000000000..a46e15217 --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/Models/CombinedRecognizedPhrase.cs @@ -0,0 +1,58 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database.Models +{ + using System; + using System.Collections.Generic; + using System.ComponentModel.DataAnnotations; + using System.ComponentModel.DataAnnotations.Schema; + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "CA2227:Collection properties should be read only", Justification = "Used by Entity Framework")] + public class CombinedRecognizedPhrase : DbModelBase + { + public CombinedRecognizedPhrase(Guid id, int channel, string lexical, string itn, string maskedItn, string display, double sentimentNegative, double sentimentNeutral, double sentimentPositive) + { + this.Id = id; + this.Channel = channel; + this.Lexical = lexical; + this.Itn = itn; + this.MaskedItn = maskedItn; + this.Display = display; + this.SentimentNegative = sentimentNegative; + this.SentimentNeutral = sentimentNeutral; + this.SentimentPositive = sentimentPositive; + } + + [Column("ID")] + [Key] + public Guid Id { get; set; } + + public int Channel { get; private set; } + + public string Lexical { get; private set; } + + public string Itn { get; private set; } + + public string MaskedItn { get; private set; } + + public string Display { get; private set; } + + public double SentimentNegative { get; private set; } + + public double SentimentNeutral { get; private set; } + + public double SentimentPositive { get; private set; } + + [ForeignKey("CombinedRecognizedPhraseID")] + public ICollection RecognizedPhrases { get; set; } + + public CombinedRecognizedPhrase WithRecognizedPhrases(ICollection recognizedPhrases) + { + this.RecognizedPhrases = recognizedPhrases; + return this; + } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Database/Models/DbModelBase.cs b/samples/ingestion/ingestion-client/Connector/Database/Models/DbModelBase.cs new file mode 100644 index 000000000..e6e230d53 --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/Models/DbModelBase.cs @@ -0,0 +1,20 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database.Models +{ + public abstract class DbModelBase + { + public const int MaxTimeSpanColumnLength = 255; + + public const int MaxLocaleLength = 255; + + public const int MaxDefaultStringLength = 500; + + public const int MaxWordLength = 511; + + public const int MaxStateLength = 32; + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Database/Models/NBest.cs b/samples/ingestion/ingestion-client/Connector/Database/Models/NBest.cs new file mode 100644 index 000000000..df99cc386 --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/Models/NBest.cs @@ -0,0 +1,58 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database.Models +{ + using System; + using System.Collections.Generic; + using System.ComponentModel.DataAnnotations; + using System.ComponentModel.DataAnnotations.Schema; + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "CA2227:Collection properties should be read only", Justification = "Used by Entity Framework")] + public class NBest : DbModelBase + { + public NBest(Guid id, double confidence, string lexical, string itn, string maskedItn, string display, double sentimentNegative, double sentimentNeutral, double sentimentPositive) + { + this.Id = id; + this.Confidence = confidence; + this.Lexical = lexical; + this.Itn = itn; + this.MaskedItn = maskedItn; + this.Display = display; + this.SentimentNegative = sentimentNegative; + this.SentimentNeutral = sentimentNeutral; + this.SentimentPositive = sentimentPositive; + } + + [Column("ID")] + [Key] + public Guid Id { get; set; } + + public double Confidence { get; private set; } + + public string Lexical { get; private set; } + + public string Itn { get; private set; } + + public string MaskedItn { get; private set; } + + public string Display { get; private set; } + + public double SentimentNegative { get; private set; } + + public double SentimentNeutral { get; private set; } + + public double SentimentPositive { get; private set; } + + [ForeignKey("NBestID")] + public ICollection Words { get; set; } + + public NBest WithWords(ICollection words) + { + this.Words = words; + return this; + } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Database/Models/RecognizedPhrase.cs b/samples/ingestion/ingestion-client/Connector/Database/Models/RecognizedPhrase.cs new file mode 100644 index 000000000..d65f9f554 --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/Models/RecognizedPhrase.cs @@ -0,0 +1,55 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database.Models +{ + using System; + using System.Collections.Generic; + using System.ComponentModel.DataAnnotations; + using System.ComponentModel.DataAnnotations.Schema; + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "CA2227:Collection properties should be read only", Justification = "Used by Entity Framework")] + public class RecognizedPhrase : DbModelBase + { + public RecognizedPhrase(Guid id, string recognitionStatus, int speaker, int channel, string offset, string duration, int silenceBetweenCurrentAndPreviousSegmentInMs) + { + this.Id = id; + this.RecognitionStatus = recognitionStatus; + this.Speaker = speaker; + this.Channel = channel; + this.Offset = offset; + this.Duration = duration; + this.SilenceBetweenCurrentAndPreviousSegmentInMs = silenceBetweenCurrentAndPreviousSegmentInMs; + } + + [Column("ID")] + [Key] + public Guid Id { get; set; } + + [StringLength(MaxStateLength)] + public string RecognitionStatus { get; private set; } + + public int Speaker { get; private set; } + + public int Channel { get; private set; } + + [StringLength(MaxTimeSpanColumnLength)] + public string Offset { get; private set; } + + [StringLength(MaxTimeSpanColumnLength)] + public string Duration { get; private set; } + + public int SilenceBetweenCurrentAndPreviousSegmentInMs { get; private set; } + + [ForeignKey("RecognizedPhraseID")] + public ICollection NBests { get; set; } + + public RecognizedPhrase WithNBests(ICollection nbests) + { + this.NBests = nbests; + return this; + } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Database/Models/Transcription.cs b/samples/ingestion/ingestion-client/Connector/Database/Models/Transcription.cs new file mode 100644 index 000000000..956e60934 --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/Models/Transcription.cs @@ -0,0 +1,62 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database.Models +{ + using System; + using System.Collections.Generic; + using System.ComponentModel.DataAnnotations; + using System.ComponentModel.DataAnnotations.Schema; + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "CA2227:Collection properties should be read only", Justification = "Used by Entity Framework")] + public class Transcription : DbModelBase + { + public Transcription(Guid id, string locale, string name, string source, DateTime timestamp, string duration, double durationInSeconds, int numberOfChannels, float approximateCost) + { + this.Id = id; + this.Locale = locale; + this.Name = name; + this.Source = source; + this.Timestamp = timestamp; + this.Duration = duration; + this.DurationInSeconds = durationInSeconds; + this.NumberOfChannels = numberOfChannels; + this.ApproximateCost = approximateCost; + } + + [Column("ID")] + [Key] + public Guid Id { get; set; } + + [StringLength(MaxLocaleLength)] + public string Locale { get; private set; } + + [StringLength(MaxDefaultStringLength)] + public string Name { get; private set; } + + [StringLength(MaxDefaultStringLength)] + public string Source { get; private set; } + + public DateTime Timestamp { get; private set; } + + [StringLength(MaxTimeSpanColumnLength)] + public string Duration { get; private set; } + + public double DurationInSeconds { get; private set; } + + public int NumberOfChannels { get; private set; } + + public float ApproximateCost { get; private set; } + + [ForeignKey("TranscriptionID")] + public ICollection CombinedRecognizedPhrases { get; set; } + + public Transcription WithCombinedRecognizedPhrases(ICollection combinedRecognizedPhrases) + { + this.CombinedRecognizedPhrases = combinedRecognizedPhrases; + return this; + } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Database/Models/Word.cs b/samples/ingestion/ingestion-client/Connector/Database/Models/Word.cs new file mode 100644 index 000000000..8275bf85f --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Database/Models/Word.cs @@ -0,0 +1,39 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace Connector.Database.Models +{ + using System; + using System.ComponentModel.DataAnnotations; + using System.ComponentModel.DataAnnotations.Schema; + + public class Word : DbModelBase + { + public Word(Guid id, string wordText, string offset, string duration, double confidence) + { + this.Id = id; + this.WordText = wordText; + this.Offset = offset; + this.Duration = duration; + this.Confidence = confidence; + } + + [Column("ID")] + [Key] + public Guid Id { get; set; } + + [Column("Word")] + [StringLength(MaxWordLength)] + public string WordText { get; private set; } + + [StringLength(MaxTimeSpanColumnLength)] + public string Offset { get; private set; } + + [StringLength(MaxTimeSpanColumnLength)] + public string Duration { get; private set; } + + public double Confidence { get; private set; } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/DatabaseConnector.cs b/samples/ingestion/ingestion-client/Connector/DatabaseConnector.cs deleted file mode 100644 index b4d599223..000000000 --- a/samples/ingestion/ingestion-client/Connector/DatabaseConnector.cs +++ /dev/null @@ -1,239 +0,0 @@ -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. -// - -namespace Connector -{ - using System; - using System.Collections.Generic; - using System.Data.SqlClient; - using System.Linq; - using System.Threading.Tasks; - using Microsoft.Extensions.Logging; - - public class DatabaseConnector : IDisposable - { - private readonly ILogger logger; - - private readonly string databaseConnectionString; - - private SqlConnection connection; - - public DatabaseConnector(ILogger logger, string databaseConnectionString) - { - this.logger = logger; - this.databaseConnectionString = databaseConnectionString; - } - - public async Task StoreTranscriptionAsync( - Guid transcriptionId, - string locale, - string fileName, - float approximateCost, - SpeechTranscript speechTranscript) - { - if (speechTranscript == null) - { - throw new ArgumentNullException(nameof(speechTranscript)); - } - - try - { - this.connection = new SqlConnection(this.databaseConnectionString); - this.connection.Open(); - - var query = "INSERT INTO dbo.Transcriptions (ID, Locale, Name, Source, Timestamp, Duration, DurationInSeconds, NumberOfChannels, ApproximateCost)" + - " VALUES (@id, @locale, @name, @source, @timestamp, @duration, @durationInSeconds, @numberOfChannels, @approximateCost)"; - - using (var command = new SqlCommand(query, this.connection)) - { - command.Parameters.AddWithValue("@id", transcriptionId); - command.Parameters.AddWithValue("@locale", locale); - command.Parameters.AddWithValue("@name", fileName); - command.Parameters.AddWithValue("@source", speechTranscript.Source); - command.Parameters.AddWithValue("@timestamp", speechTranscript.Timestamp); - command.Parameters.AddWithValue("@duration", speechTranscript.Duration ?? string.Empty); - command.Parameters.AddWithValue("@durationInSeconds", TimeSpan.FromTicks(speechTranscript.DurationInTicks).TotalSeconds); - command.Parameters.AddWithValue("@numberOfChannels", speechTranscript.CombinedRecognizedPhrases.Count()); - command.Parameters.AddWithValue("@approximateCost", approximateCost); - - var result = await command.ExecuteNonQueryAsync().ConfigureAwait(false); - - if (result < 0) - { - this.logger.LogInformation("Did not store json in Db, command did not update table"); - } - else - { - var phrasesByChannel = speechTranscript.RecognizedPhrases.GroupBy(t => t.Channel); - - foreach (var phrases in phrasesByChannel) - { - var channel = phrases.Key; - await this.StoreCombinedRecognizedPhrasesAsync(transcriptionId, channel, speechTranscript, phrases).ConfigureAwait(false); - } - } - } - - this.connection.Close(); - } - catch (SqlException e) - { - this.logger.LogInformation(e.ToString()); - return false; - } - - return true; - } - - public void Dispose() - { - this.Dispose(true); - GC.SuppressFinalize(this); - } - - protected virtual void Dispose(bool disposing) - { - this.logger.LogInformation("Disposing DBConnector"); - if (disposing) - { - this.connection?.Dispose(); - } - } - - private async Task StoreCombinedRecognizedPhrasesAsync(Guid transcriptionId, int channel, SpeechTranscript speechTranscript, IEnumerable recognizedPhrases) - { - var combinedRecognizedPhraseID = Guid.NewGuid(); - - var combinedPhrases = speechTranscript.CombinedRecognizedPhrases.Where(t => t.Channel == channel).FirstOrDefault(); - - var query = "INSERT INTO dbo.CombinedRecognizedPhrases (ID, TranscriptionID, Channel, Lexical, Itn, MaskedItn, Display, SentimentPositive, SentimentNeutral, SentimentNegative)" + - " VALUES (@id, @transcriptionID, @channel, @lexical, @itn, @maskedItn, @display, @sentimentPositive, @sentimentNeutral, @sentimentNegative)"; - - using var command = new SqlCommand(query, this.connection); - command.Parameters.AddWithValue("@id", combinedRecognizedPhraseID); - command.Parameters.AddWithValue("@transcriptionID", transcriptionId); - command.Parameters.AddWithValue("@channel", channel); - - command.Parameters.AddWithValue("@lexical", combinedPhrases?.Lexical ?? string.Empty); - command.Parameters.AddWithValue("@itn", combinedPhrases?.ITN ?? string.Empty); - command.Parameters.AddWithValue("@maskedItn", combinedPhrases?.MaskedITN ?? string.Empty); - command.Parameters.AddWithValue("@display", combinedPhrases?.Display ?? string.Empty); - - command.Parameters.AddWithValue("@sentimentPositive", combinedPhrases?.Sentiment?.Positive ?? 0f); - command.Parameters.AddWithValue("@sentimentNeutral", combinedPhrases?.Sentiment?.Neutral ?? 0f); - command.Parameters.AddWithValue("@sentimentNegative", combinedPhrases?.Sentiment?.Negative ?? 0f); - - var result = await command.ExecuteNonQueryAsync().ConfigureAwait(false); - - if (result < 0) - { - this.logger.LogInformation("Did not store combined phrase in Db, command did not update table"); - } - else - { - var orderedPhrases = recognizedPhrases.OrderBy(p => p.OffsetInTicks); - var previousEndInMs = 0.0; - foreach (var phrase in orderedPhrases) - { - await this.StoreRecognizedPhraseAsync(combinedRecognizedPhraseID, phrase, previousEndInMs).ConfigureAwait(false); - previousEndInMs = (TimeSpan.FromTicks(phrase.OffsetInTicks) + TimeSpan.FromTicks(phrase.DurationInTicks)).TotalMilliseconds; - } - } - } - - private async Task StoreRecognizedPhraseAsync(Guid combinedPhraseID, RecognizedPhrase recognizedPhrase, double previousEndInMs) - { - var silenceBetweenCurrentAndPreviousSegmentInMs = Math.Max(0, TimeSpan.FromTicks(recognizedPhrase.OffsetInTicks).TotalMilliseconds - previousEndInMs); - - var phraseId = Guid.NewGuid(); - var query = "INSERT INTO dbo.RecognizedPhrases (ID, CombinedRecognizedPhraseID, RecognitionStatus, Speaker, Channel, Offset, Duration, SilenceBetweenCurrentAndPreviousSegmentInMs)" + - " VALUES (@id, @combinedRecognizedPhraseID, @recognitionStatus, @speaker, @channel, @offset, @duration, @silenceBetweenCurrentAndPreviousSegmentInMs)"; - - using var command = new SqlCommand(query, this.connection); - command.Parameters.AddWithValue("@id", phraseId); - command.Parameters.AddWithValue("@combinedRecognizedPhraseID", combinedPhraseID); - command.Parameters.AddWithValue("@recognitionStatus", recognizedPhrase.RecognitionStatus); - command.Parameters.AddWithValue("@speaker", recognizedPhrase.Speaker); - command.Parameters.AddWithValue("@channel", recognizedPhrase.Channel); - command.Parameters.AddWithValue("@offset", recognizedPhrase.Offset); - command.Parameters.AddWithValue("@duration", recognizedPhrase.Duration); - command.Parameters.AddWithValue("@silenceBetweenCurrentAndPreviousSegmentInMs", silenceBetweenCurrentAndPreviousSegmentInMs); - - var result = await command.ExecuteNonQueryAsync().ConfigureAwait(false); - - if (result < 0) - { - this.logger.LogInformation("Did not store phrase in Db, command did not update table"); - } - else - { - foreach (var nbestResult in recognizedPhrase.NBest) - { - await this.StoreNBestAsync(phraseId, nbestResult).ConfigureAwait(false); - } - } - } - - private async Task StoreNBestAsync(Guid recognizedPhraseID, NBest nbest) - { - var nbestID = Guid.NewGuid(); - var query = "INSERT INTO dbo.NBests (ID, RecognizedPhraseID, Confidence, Lexical, Itn, MaskedItn, Display, SentimentNegative, SentimentNeutral, SentimentPositive)" + - " VALUES (@id, @recognizedPhraseID, @confidence, @lexical, @itn, @maskedItn, @display, @sentimentNegative, @sentimentNeutral, @sentimentPositive)"; - - using var command = new SqlCommand(query, this.connection); - command.Parameters.AddWithValue("@id", nbestID); - command.Parameters.AddWithValue("@recognizedPhraseID", recognizedPhraseID); - command.Parameters.AddWithValue("@confidence", nbest.Confidence); - command.Parameters.AddWithValue("@lexical", nbest.Lexical); - command.Parameters.AddWithValue("@itn", nbest.ITN); - command.Parameters.AddWithValue("@maskedItn", nbest.MaskedITN); - command.Parameters.AddWithValue("@display", nbest.Display); - - command.Parameters.AddWithValue("@sentimentNegative", nbest?.Sentiment?.Negative ?? 0f); - command.Parameters.AddWithValue("@sentimentNeutral", nbest?.Sentiment?.Neutral ?? 0f); - command.Parameters.AddWithValue("@sentimentPositive", nbest?.Sentiment?.Positive ?? 0f); - - var result = await command.ExecuteNonQueryAsync().ConfigureAwait(false); - - if (result < 0) - { - this.logger.LogInformation("Did not store nbest in Db, command did not update table"); - } - else - { - if (nbest.Words == null) - { - return; - } - - foreach (var word in nbest.Words) - { - await this.StoreWordsAsync(nbestID, word).ConfigureAwait(false); - } - } - } - - private async Task StoreWordsAsync(Guid nbestId, Words word) - { - var wordID = Guid.NewGuid(); - var query = "INSERT INTO dbo.Words (ID, NBestID, Word, Offset, Duration, Confidence)" + - " VALUES (@id, @nBestID, @word, @offset, @duration, @confidence)"; - - using var command = new SqlCommand(query, this.connection); - command.Parameters.AddWithValue("@id", wordID); - command.Parameters.AddWithValue("@nBestID", nbestId); - command.Parameters.AddWithValue("@word", word.Word); - command.Parameters.AddWithValue("@offset", word.Offset); - command.Parameters.AddWithValue("@duration", word.Duration); - command.Parameters.AddWithValue("@confidence", word.Confidence); - - var result = await command.ExecuteNonQueryAsync().ConfigureAwait(false); - if (result < 0) - { - this.logger.LogInformation("Did not Store word result in Db, command did not update table"); - } - } - } -} \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.Designer.cs b/samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.Designer.cs new file mode 100644 index 000000000..8d18e46fd --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.Designer.cs @@ -0,0 +1,270 @@ +// +using System; +using Connector.Database; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Metadata; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; + +#nullable disable + +namespace Connector.Migrations +{ + [DbContext(typeof(IngestionClientDbContext))] + [Migration("20221107110715_Init")] + partial class Init + { + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "6.0.10") + .HasAnnotation("Relational:MaxIdentifierLength", 128); + + SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder, 1L, 1); + + modelBuilder.Entity("Connector.Database.Models.CombinedRecognizedPhrase", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Channel") + .HasColumnType("int"); + + b.Property("Display") + .HasColumnType("nvarchar(max)"); + + b.Property("Itn") + .HasColumnType("nvarchar(max)"); + + b.Property("Lexical") + .HasColumnType("nvarchar(max)"); + + b.Property("MaskedItn") + .HasColumnType("nvarchar(max)"); + + b.Property("SentimentNegative") + .HasColumnType("float"); + + b.Property("SentimentNeutral") + .HasColumnType("float"); + + b.Property("SentimentPositive") + .HasColumnType("float"); + + b.Property("TranscriptionID") + .HasColumnType("uniqueidentifier"); + + b.HasKey("Id"); + + b.HasIndex("TranscriptionID"); + + b.ToTable("CombinedRecognizedPhrases"); + }); + + modelBuilder.Entity("Connector.Database.Models.NBest", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Confidence") + .HasColumnType("float"); + + b.Property("Display") + .HasColumnType("nvarchar(max)"); + + b.Property("Itn") + .HasColumnType("nvarchar(max)"); + + b.Property("Lexical") + .HasColumnType("nvarchar(max)"); + + b.Property("MaskedItn") + .HasColumnType("nvarchar(max)"); + + b.Property("RecognizedPhraseID") + .HasColumnType("uniqueidentifier"); + + b.Property("SentimentNegative") + .HasColumnType("float"); + + b.Property("SentimentNeutral") + .HasColumnType("float"); + + b.Property("SentimentPositive") + .HasColumnType("float"); + + b.HasKey("Id"); + + b.HasIndex("RecognizedPhraseID"); + + b.ToTable("NBests"); + }); + + modelBuilder.Entity("Connector.Database.Models.RecognizedPhrase", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Channel") + .HasColumnType("int"); + + b.Property("CombinedRecognizedPhraseID") + .HasColumnType("uniqueidentifier"); + + b.Property("Duration") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("Offset") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("RecognitionStatus") + .HasMaxLength(32) + .HasColumnType("nvarchar(32)"); + + b.Property("SilenceBetweenCurrentAndPreviousSegmentInMs") + .HasColumnType("int"); + + b.Property("Speaker") + .HasColumnType("int"); + + b.HasKey("Id"); + + b.HasIndex("CombinedRecognizedPhraseID"); + + b.ToTable("RecognizedPhrases"); + }); + + modelBuilder.Entity("Connector.Database.Models.Transcription", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("ApproximateCost") + .HasColumnType("real"); + + b.Property("Duration") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("DurationInSeconds") + .HasColumnType("float"); + + b.Property("Locale") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("Name") + .HasMaxLength(500) + .HasColumnType("nvarchar(500)"); + + b.Property("NumberOfChannels") + .HasColumnType("int"); + + b.Property("Source") + .HasMaxLength(500) + .HasColumnType("nvarchar(500)"); + + b.Property("Timestamp") + .HasColumnType("datetime2"); + + b.HasKey("Id"); + + b.ToTable("Transcriptions"); + }); + + modelBuilder.Entity("Connector.Database.Models.Word", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Confidence") + .HasColumnType("float"); + + b.Property("Duration") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("NBestID") + .HasColumnType("uniqueidentifier"); + + b.Property("Offset") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("WordText") + .HasMaxLength(511) + .HasColumnType("nvarchar(511)") + .HasColumnName("Word"); + + b.HasKey("Id"); + + b.HasIndex("NBestID"); + + b.ToTable("Words"); + }); + + modelBuilder.Entity("Connector.Database.Models.CombinedRecognizedPhrase", b => + { + b.HasOne("Connector.Database.Models.Transcription", null) + .WithMany("CombinedRecognizedPhrases") + .HasForeignKey("TranscriptionID"); + }); + + modelBuilder.Entity("Connector.Database.Models.NBest", b => + { + b.HasOne("Connector.Database.Models.RecognizedPhrase", null) + .WithMany("NBests") + .HasForeignKey("RecognizedPhraseID"); + }); + + modelBuilder.Entity("Connector.Database.Models.RecognizedPhrase", b => + { + b.HasOne("Connector.Database.Models.CombinedRecognizedPhrase", null) + .WithMany("RecognizedPhrases") + .HasForeignKey("CombinedRecognizedPhraseID"); + }); + + modelBuilder.Entity("Connector.Database.Models.Word", b => + { + b.HasOne("Connector.Database.Models.NBest", null) + .WithMany("Words") + .HasForeignKey("NBestID"); + }); + + modelBuilder.Entity("Connector.Database.Models.CombinedRecognizedPhrase", b => + { + b.Navigation("RecognizedPhrases"); + }); + + modelBuilder.Entity("Connector.Database.Models.NBest", b => + { + b.Navigation("Words"); + }); + + modelBuilder.Entity("Connector.Database.Models.RecognizedPhrase", b => + { + b.Navigation("NBests"); + }); + + modelBuilder.Entity("Connector.Database.Models.Transcription", b => + { + b.Navigation("CombinedRecognizedPhrases"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.cs b/samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.cs new file mode 100644 index 000000000..014ecdf20 --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Migrations/20221107110715_Init.cs @@ -0,0 +1,173 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +#nullable disable + +namespace Connector.Migrations +{ + using System; + using Microsoft.EntityFrameworkCore.Migrations; + + public partial class Init : Migration + { + protected override void Up(MigrationBuilder migrationBuilder) + { + _ = migrationBuilder ?? throw new ArgumentNullException(nameof(migrationBuilder)); + + migrationBuilder.CreateTable( + name: "Transcriptions", + columns: table => new + { + ID = table.Column(type: "uniqueidentifier", nullable: false), + Locale = table.Column(type: "nvarchar(255)", maxLength: 255, nullable: true), + Name = table.Column(type: "nvarchar(500)", maxLength: 500, nullable: true), + Source = table.Column(type: "nvarchar(500)", maxLength: 500, nullable: true), + Timestamp = table.Column(type: "datetime2", nullable: false), + Duration = table.Column(type: "nvarchar(255)", maxLength: 255, nullable: true), + DurationInSeconds = table.Column(type: "float", nullable: false), + NumberOfChannels = table.Column(type: "int", nullable: false), + ApproximateCost = table.Column(type: "real", nullable: false) + }, + constraints: table => + { + table.PrimaryKey("PK_Transcriptions", x => x.ID); + }); + + migrationBuilder.CreateTable( + name: "CombinedRecognizedPhrases", + columns: table => new + { + ID = table.Column(type: "uniqueidentifier", nullable: false), + Channel = table.Column(type: "int", nullable: false), + Lexical = table.Column(type: "nvarchar(max)", nullable: true), + Itn = table.Column(type: "nvarchar(max)", nullable: true), + MaskedItn = table.Column(type: "nvarchar(max)", nullable: true), + Display = table.Column(type: "nvarchar(max)", nullable: true), + SentimentNegative = table.Column(type: "float", nullable: false), + SentimentNeutral = table.Column(type: "float", nullable: false), + SentimentPositive = table.Column(type: "float", nullable: false), + TranscriptionID = table.Column(type: "uniqueidentifier", nullable: true) + }, + constraints: table => + { + table.PrimaryKey("PK_CombinedRecognizedPhrases", x => x.ID); + table.ForeignKey( + name: "FK_CombinedRecognizedPhrases_Transcriptions_TranscriptionID", + column: x => x.TranscriptionID, + principalTable: "Transcriptions", + principalColumn: "ID"); + }); + + migrationBuilder.CreateTable( + name: "RecognizedPhrases", + columns: table => new + { + ID = table.Column(type: "uniqueidentifier", nullable: false), + RecognitionStatus = table.Column(type: "nvarchar(32)", maxLength: 32, nullable: true), + Speaker = table.Column(type: "int", nullable: false), + Channel = table.Column(type: "int", nullable: false), + Offset = table.Column(type: "nvarchar(255)", maxLength: 255, nullable: true), + Duration = table.Column(type: "nvarchar(255)", maxLength: 255, nullable: true), + SilenceBetweenCurrentAndPreviousSegmentInMs = table.Column(type: "int", nullable: false), + CombinedRecognizedPhraseID = table.Column(type: "uniqueidentifier", nullable: true) + }, + constraints: table => + { + table.PrimaryKey("PK_RecognizedPhrases", x => x.ID); + table.ForeignKey( + name: "FK_RecognizedPhrases_CombinedRecognizedPhrases_CombinedRecognizedPhraseID", + column: x => x.CombinedRecognizedPhraseID, + principalTable: "CombinedRecognizedPhrases", + principalColumn: "ID"); + }); + + migrationBuilder.CreateTable( + name: "NBests", + columns: table => new + { + ID = table.Column(type: "uniqueidentifier", nullable: false), + Confidence = table.Column(type: "float", nullable: false), + Lexical = table.Column(type: "nvarchar(max)", nullable: true), + Itn = table.Column(type: "nvarchar(max)", nullable: true), + MaskedItn = table.Column(type: "nvarchar(max)", nullable: true), + Display = table.Column(type: "nvarchar(max)", nullable: true), + SentimentNegative = table.Column(type: "float", nullable: false), + SentimentNeutral = table.Column(type: "float", nullable: false), + SentimentPositive = table.Column(type: "float", nullable: false), + RecognizedPhraseID = table.Column(type: "uniqueidentifier", nullable: true) + }, + constraints: table => + { + table.PrimaryKey("PK_NBests", x => x.ID); + table.ForeignKey( + name: "FK_NBests_RecognizedPhrases_RecognizedPhraseID", + column: x => x.RecognizedPhraseID, + principalTable: "RecognizedPhrases", + principalColumn: "ID"); + }); + + migrationBuilder.CreateTable( + name: "Words", + columns: table => new + { + ID = table.Column(type: "uniqueidentifier", nullable: false), + Word = table.Column(type: "nvarchar(511)", maxLength: 511, nullable: true), + Offset = table.Column(type: "nvarchar(255)", maxLength: 255, nullable: true), + Duration = table.Column(type: "nvarchar(255)", maxLength: 255, nullable: true), + Confidence = table.Column(type: "float", nullable: false), + NBestID = table.Column(type: "uniqueidentifier", nullable: true) + }, + constraints: table => + { + table.PrimaryKey("PK_Words", x => x.ID); + table.ForeignKey( + name: "FK_Words_NBests_NBestID", + column: x => x.NBestID, + principalTable: "NBests", + principalColumn: "ID"); + }); + + migrationBuilder.CreateIndex( + name: "IX_CombinedRecognizedPhrases_TranscriptionID", + table: "CombinedRecognizedPhrases", + column: "TranscriptionID"); + + migrationBuilder.CreateIndex( + name: "IX_NBests_RecognizedPhraseID", + table: "NBests", + column: "RecognizedPhraseID"); + + migrationBuilder.CreateIndex( + name: "IX_RecognizedPhrases_CombinedRecognizedPhraseID", + table: "RecognizedPhrases", + column: "CombinedRecognizedPhraseID"); + + migrationBuilder.CreateIndex( + name: "IX_Words_NBestID", + table: "Words", + column: "NBestID"); + } + + protected override void Down(MigrationBuilder migrationBuilder) + { + _ = migrationBuilder ?? throw new ArgumentNullException(nameof(migrationBuilder)); + + migrationBuilder.DropTable( + name: "Words"); + + migrationBuilder.DropTable( + name: "NBests"); + + migrationBuilder.DropTable( + name: "RecognizedPhrases"); + + migrationBuilder.DropTable( + name: "CombinedRecognizedPhrases"); + + migrationBuilder.DropTable( + name: "Transcriptions"); + } + } +} diff --git a/samples/ingestion/ingestion-client/Connector/Migrations/IngestionClientDbContextModelSnapshot.cs b/samples/ingestion/ingestion-client/Connector/Migrations/IngestionClientDbContextModelSnapshot.cs new file mode 100644 index 000000000..8b83454ef --- /dev/null +++ b/samples/ingestion/ingestion-client/Connector/Migrations/IngestionClientDbContextModelSnapshot.cs @@ -0,0 +1,268 @@ +// +using System; +using Connector.Database; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Metadata; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; + +#nullable disable + +namespace Connector.Migrations +{ + [DbContext(typeof(IngestionClientDbContext))] + partial class IngestionClientDbContextModelSnapshot : ModelSnapshot + { + protected override void BuildModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "6.0.10") + .HasAnnotation("Relational:MaxIdentifierLength", 128); + + SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder, 1L, 1); + + modelBuilder.Entity("Connector.Database.Models.CombinedRecognizedPhrase", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Channel") + .HasColumnType("int"); + + b.Property("Display") + .HasColumnType("nvarchar(max)"); + + b.Property("Itn") + .HasColumnType("nvarchar(max)"); + + b.Property("Lexical") + .HasColumnType("nvarchar(max)"); + + b.Property("MaskedItn") + .HasColumnType("nvarchar(max)"); + + b.Property("SentimentNegative") + .HasColumnType("float"); + + b.Property("SentimentNeutral") + .HasColumnType("float"); + + b.Property("SentimentPositive") + .HasColumnType("float"); + + b.Property("TranscriptionID") + .HasColumnType("uniqueidentifier"); + + b.HasKey("Id"); + + b.HasIndex("TranscriptionID"); + + b.ToTable("CombinedRecognizedPhrases"); + }); + + modelBuilder.Entity("Connector.Database.Models.NBest", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Confidence") + .HasColumnType("float"); + + b.Property("Display") + .HasColumnType("nvarchar(max)"); + + b.Property("Itn") + .HasColumnType("nvarchar(max)"); + + b.Property("Lexical") + .HasColumnType("nvarchar(max)"); + + b.Property("MaskedItn") + .HasColumnType("nvarchar(max)"); + + b.Property("RecognizedPhraseID") + .HasColumnType("uniqueidentifier"); + + b.Property("SentimentNegative") + .HasColumnType("float"); + + b.Property("SentimentNeutral") + .HasColumnType("float"); + + b.Property("SentimentPositive") + .HasColumnType("float"); + + b.HasKey("Id"); + + b.HasIndex("RecognizedPhraseID"); + + b.ToTable("NBests"); + }); + + modelBuilder.Entity("Connector.Database.Models.RecognizedPhrase", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Channel") + .HasColumnType("int"); + + b.Property("CombinedRecognizedPhraseID") + .HasColumnType("uniqueidentifier"); + + b.Property("Duration") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("Offset") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("RecognitionStatus") + .HasMaxLength(32) + .HasColumnType("nvarchar(32)"); + + b.Property("SilenceBetweenCurrentAndPreviousSegmentInMs") + .HasColumnType("int"); + + b.Property("Speaker") + .HasColumnType("int"); + + b.HasKey("Id"); + + b.HasIndex("CombinedRecognizedPhraseID"); + + b.ToTable("RecognizedPhrases"); + }); + + modelBuilder.Entity("Connector.Database.Models.Transcription", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("ApproximateCost") + .HasColumnType("real"); + + b.Property("Duration") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("DurationInSeconds") + .HasColumnType("float"); + + b.Property("Locale") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("Name") + .HasMaxLength(500) + .HasColumnType("nvarchar(500)"); + + b.Property("NumberOfChannels") + .HasColumnType("int"); + + b.Property("Source") + .HasMaxLength(500) + .HasColumnType("nvarchar(500)"); + + b.Property("Timestamp") + .HasColumnType("datetime2"); + + b.HasKey("Id"); + + b.ToTable("Transcriptions"); + }); + + modelBuilder.Entity("Connector.Database.Models.Word", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("uniqueidentifier") + .HasColumnName("ID"); + + b.Property("Confidence") + .HasColumnType("float"); + + b.Property("Duration") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("NBestID") + .HasColumnType("uniqueidentifier"); + + b.Property("Offset") + .HasMaxLength(255) + .HasColumnType("nvarchar(255)"); + + b.Property("WordText") + .HasMaxLength(511) + .HasColumnType("nvarchar(511)") + .HasColumnName("Word"); + + b.HasKey("Id"); + + b.HasIndex("NBestID"); + + b.ToTable("Words"); + }); + + modelBuilder.Entity("Connector.Database.Models.CombinedRecognizedPhrase", b => + { + b.HasOne("Connector.Database.Models.Transcription", null) + .WithMany("CombinedRecognizedPhrases") + .HasForeignKey("TranscriptionID"); + }); + + modelBuilder.Entity("Connector.Database.Models.NBest", b => + { + b.HasOne("Connector.Database.Models.RecognizedPhrase", null) + .WithMany("NBests") + .HasForeignKey("RecognizedPhraseID"); + }); + + modelBuilder.Entity("Connector.Database.Models.RecognizedPhrase", b => + { + b.HasOne("Connector.Database.Models.CombinedRecognizedPhrase", null) + .WithMany("RecognizedPhrases") + .HasForeignKey("CombinedRecognizedPhraseID"); + }); + + modelBuilder.Entity("Connector.Database.Models.Word", b => + { + b.HasOne("Connector.Database.Models.NBest", null) + .WithMany("Words") + .HasForeignKey("NBestID"); + }); + + modelBuilder.Entity("Connector.Database.Models.CombinedRecognizedPhrase", b => + { + b.Navigation("RecognizedPhrases"); + }); + + modelBuilder.Entity("Connector.Database.Models.NBest", b => + { + b.Navigation("Words"); + }); + + modelBuilder.Entity("Connector.Database.Models.RecognizedPhrase", b => + { + b.Navigation("NBests"); + }); + + modelBuilder.Entity("Connector.Database.Models.Transcription", b => + { + b.Navigation("CombinedRecognizedPhrases"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/samples/ingestion/ingestion-client/DatabaseMigrator/DatabaseMigrator.csproj b/samples/ingestion/ingestion-client/DatabaseMigrator/DatabaseMigrator.csproj new file mode 100644 index 000000000..a0bfdfc5e --- /dev/null +++ b/samples/ingestion/ingestion-client/DatabaseMigrator/DatabaseMigrator.csproj @@ -0,0 +1,17 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + + + + diff --git a/samples/ingestion/ingestion-client/DatabaseMigrator/DesignTimeSpeechServicesDbContextFactory.cs b/samples/ingestion/ingestion-client/DatabaseMigrator/DesignTimeSpeechServicesDbContextFactory.cs new file mode 100644 index 000000000..33adda329 --- /dev/null +++ b/samples/ingestion/ingestion-client/DatabaseMigrator/DesignTimeSpeechServicesDbContextFactory.cs @@ -0,0 +1,25 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace DatabaseMigrator +{ + using Connector.Database; + + using Microsoft.EntityFrameworkCore; + using Microsoft.EntityFrameworkCore.Design; + + public class DesignTimeSpeechServicesDbContextFactory : IDesignTimeDbContextFactory + { + public IngestionClientDbContext CreateDbContext(string[] args) + { + var optionsBuilder = new DbContextOptionsBuilder(); + optionsBuilder.UseSqlServer("Server=(localdb)\\mssqllocaldb;Database=DesignTimeDb;Trusted_Connection=True;MultipleActiveResultSets=true"); + var options = optionsBuilder.Options; + options.Freeze(); + var context = new IngestionClientDbContext(optionsBuilder.Options); + return context; + } + } +} \ No newline at end of file diff --git a/samples/ingestion/ingestion-client/DatabaseMigrator/Program.cs b/samples/ingestion/ingestion-client/DatabaseMigrator/Program.cs new file mode 100644 index 000000000..0fcc2ab4d --- /dev/null +++ b/samples/ingestion/ingestion-client/DatabaseMigrator/Program.cs @@ -0,0 +1,20 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +namespace DatabaseMigrator +{ + public static class Program + { + /// + /// Used for creating a database migration locally. + /// Run + /// dotnet ef migrations add Init --project Connector --startup-project DatabaseMigrator + /// to add entity framework database migration + /// + public static void Main() + { + } + } +} diff --git a/samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseConfigProvider.cs b/samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseConfigProvider.cs new file mode 100644 index 000000000..db751083d --- /dev/null +++ b/samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseConfigProvider.cs @@ -0,0 +1,38 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +using FetchTranscription.Database; + +using Microsoft.Azure.WebJobs.Hosting; + +[assembly: WebJobsStartup(typeof(DatabaseInitializationService), "DatabaseInitialize")] + +namespace FetchTranscription.Database +{ + using Connector.Database; + + using Microsoft.Azure.WebJobs.Description; + using Microsoft.Azure.WebJobs.Host.Config; + using Microsoft.EntityFrameworkCore; + using Microsoft.Extensions.DependencyInjection; + + [Extension("DatabaseConfig")] + public class DatabaseConfigProvider : IExtensionConfigProvider + { + private readonly IServiceScopeFactory scopeFactory; + + public DatabaseConfigProvider(IServiceScopeFactory scopeFactory) + { + this.scopeFactory = scopeFactory; + } + + public void Initialize(ExtensionConfigContext context) + { + using var scope = this.scopeFactory.CreateScope(); + var databaseContext = scope.ServiceProvider.GetService(); + databaseContext.Database.Migrate(); + } + } +} diff --git a/samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseInitializationService.cs b/samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseInitializationService.cs new file mode 100644 index 000000000..26fa430f4 --- /dev/null +++ b/samples/ingestion/ingestion-client/FetchTranscription/Database/DatabaseInitializationService.cs @@ -0,0 +1,22 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +using FetchTranscription.Database; + +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Hosting; + +[assembly: WebJobsStartup(typeof(DatabaseInitializationService), "DatabaseInitialize")] + +namespace FetchTranscription.Database +{ + public class DatabaseInitializationService : IWebJobsStartup + { + public void Configure(IWebJobsBuilder builder) + { + builder.AddExtension(); + } + } +} diff --git a/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs b/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs index 41de4a790..d27673ce3 100644 --- a/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs +++ b/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.cs @@ -8,13 +8,21 @@ namespace FetchTranscriptionFunction using System; using System.Threading.Tasks; using Connector; + using Microsoft.Azure.WebJobs; using Microsoft.Extensions.Logging; - public static class FetchTranscription + public class FetchTranscription { + private readonly IServiceProvider serviceProvider; + + public FetchTranscription(IServiceProvider serviceProvider) + { + this.serviceProvider = serviceProvider; + } + [FunctionName("FetchTranscription")] - public static async Task Run([ServiceBusTrigger("fetch_transcription_queue", Connection = "AzureServiceBus")]string message, ILogger log) + public async Task Run([ServiceBusTrigger("fetch_transcription_queue", Connection = "AzureServiceBus")]string message, ILogger log) { if (log == null) { @@ -30,7 +38,10 @@ public static async Task Run([ServiceBusTrigger("fetch_transcription_queue", Con } var serviceBusMessage = TranscriptionStartedMessage.DeserializeMessage(message); - await TranscriptionProcessor.ProcessTranscriptionJobAsync(serviceBusMessage, log).ConfigureAwait(false); + + var transcriptionProcessor = new TranscriptionProcessor(this.serviceProvider); + + await transcriptionProcessor.ProcessTranscriptionJobAsync(serviceBusMessage, this.serviceProvider, log).ConfigureAwait(false); } } } diff --git a/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.csproj b/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.csproj index fd89c55c1..d6936db77 100644 --- a/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.csproj +++ b/samples/ingestion/ingestion-client/FetchTranscription/FetchTranscription.csproj @@ -6,9 +6,8 @@ + - - diff --git a/samples/ingestion/ingestion-client/FetchTranscription/Startup.cs b/samples/ingestion/ingestion-client/FetchTranscription/Startup.cs new file mode 100644 index 000000000..a5d1463e3 --- /dev/null +++ b/samples/ingestion/ingestion-client/FetchTranscription/Startup.cs @@ -0,0 +1,35 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// + +using Microsoft.Azure.Functions.Extensions.DependencyInjection; + +[assembly: FunctionsStartup(typeof(FetchTranscription.Startup))] + +namespace FetchTranscription +{ + using System; + + using Connector.Database; + + using FetchTranscriptionFunction; + + using Microsoft.Azure.Functions.Extensions.DependencyInjection; + using Microsoft.EntityFrameworkCore; + using Microsoft.Extensions.DependencyInjection; + + public class Startup : FunctionsStartup + { + public override void Configure(IFunctionsHostBuilder builder) + { + _ = builder ?? throw new ArgumentNullException(nameof(builder)); + + if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase) + { + builder.Services.AddDbContext( + options => SqlServerDbContextOptionsExtensions.UseSqlServer(options, FetchTranscriptionEnvironmentVariables.DatabaseConnectionString)); + } + } + } +} diff --git a/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs index 61f0f4e83..657fc9c50 100644 --- a/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs +++ b/samples/ingestion/ingestion-client/FetchTranscription/TranscriptionProcessor.cs @@ -15,17 +15,20 @@ namespace FetchTranscriptionFunction using Azure; using Azure.Messaging.ServiceBus; using Connector; + using Connector.Database; using Connector.Enums; using Connector.Serializable.TranscriptionStartedServiceBusMessage; using Language; + using Microsoft.EntityFrameworkCore; + using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Newtonsoft.Json; using TextAnalytics; using static Connector.Serializable.TranscriptionStartedServiceBusMessage.TextAnalyticsRequest; - public static class TranscriptionProcessor + public class TranscriptionProcessor { private static readonly StorageConnector StorageConnectorInstance = new StorageConnector(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage); @@ -37,8 +40,22 @@ public static class TranscriptionProcessor private static readonly ServiceBusSender FetchServiceBusSender = FetchServiceBusClient.CreateSender(ServiceBusConnectionStringProperties.Parse(FetchTranscriptionEnvironmentVariables.FetchTranscriptionServiceBusConnectionString).EntityPath); + private readonly IServiceProvider serviceProvider; + + private readonly IngestionClientDbContext databaseContext; + + public TranscriptionProcessor(IServiceProvider serviceProvider) + { + this.serviceProvider = serviceProvider; + + if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase) + { + this.databaseContext = this.serviceProvider.GetRequiredService(); + } + } + [System.Diagnostics.CodeAnalysis.SuppressMessage("Design", "CA1031:Do not catch general exception types", Justification = "Catch general exception to allow manual retrying.")] - public static async Task ProcessTranscriptionJobAsync(TranscriptionStartedMessage serviceBusMessage, ILogger log) + public async Task ProcessTranscriptionJobAsync(TranscriptionStartedMessage serviceBusMessage, IServiceProvider serviceProvider, ILogger log) { if (serviceBusMessage == null) { @@ -63,7 +80,7 @@ public static async Task ProcessTranscriptionJobAsync(TranscriptionStartedMessag await ProcessFailedTranscriptionAsync(transcriptionLocation, subscriptionKey, serviceBusMessage, transcription, jobName, log).ConfigureAwait(false); break; case "Succeeded": - await ProcessSucceededTranscriptionAsync(transcriptionLocation, subscriptionKey, serviceBusMessage, jobName, log).ConfigureAwait(false); + await this.ProcessSucceededTranscriptionAsync(transcriptionLocation, subscriptionKey, serviceBusMessage, jobName, log).ConfigureAwait(false); break; case "Running": log.LogInformation($"Transcription running, polling again after {messageDelayTime.TotalMinutes} minutes."); @@ -201,7 +218,102 @@ await StorageConnectorInstance.MoveFileAsync( await BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey).ConfigureAwait(false); } - private static async Task ProcessSucceededTranscriptionAsync(string transcriptionLocation, string subscriptionKey, TranscriptionStartedMessage serviceBusMessage, string jobName, ILogger log) + private static bool IsRetryableError(string errorCode) + { + return errorCode switch + { + "InvalidUri" or "Internal" or "Timeout" or "Transient" => true, + _ => false, + }; + } + + private static async Task ProcessReportFileAsync(TranscriptionReportFile transcriptionReportFile, ILogger log) + { + var failedTranscriptions = transcriptionReportFile.Details. + Where(detail => !string.IsNullOrEmpty(detail.Status) && + detail.Status.Equals("Failed", StringComparison.OrdinalIgnoreCase) && + !string.IsNullOrEmpty(detail.Source)); + + foreach (var failedTranscription in failedTranscriptions) + { + if (string.IsNullOrEmpty(failedTranscription.Source)) + { + continue; + } + + var safeErrorCode = failedTranscription.ErrorKind ?? "unknown"; + var safeErrorMessage = failedTranscription.ErrorMessage ?? "unknown"; + + var fileName = StorageConnector.GetFileNameFromUri(new Uri(failedTranscription.Source)); + + var message = $"Transcription \"{fileName}\" failed with error \"{safeErrorCode}\" and message \"{safeErrorMessage}\""; + log.LogError(message); + + var errorTxtname = fileName + ".txt"; + await StorageConnectorInstance.WriteTextFileToBlobAsync( + message, + FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer, + errorTxtname, + log).ConfigureAwait(false); + await StorageConnectorInstance.MoveFileAsync( + FetchTranscriptionEnvironmentVariables.AudioInputContainer, + fileName, + FetchTranscriptionEnvironmentVariables.ErrorFilesOutputContainer, + fileName, + false, + log).ConfigureAwait(false); + } + } + + private static async Task RetryOrFailJobAsync(TranscriptionStartedMessage message, string errorMessage, string jobName, string transcriptionLocation, string subscriptionKey, ILogger log, bool isThrottled) + { + log.LogError(errorMessage); + message.FailedExecutionCounter += 1; + var messageDelayTime = GetMessageDelayTime(message.PollingCounter); + + if (message.FailedExecutionCounter <= FetchTranscriptionEnvironmentVariables.RetryLimit || isThrottled) + { + log.LogInformation("Retrying.."); + await ServiceBusUtilities.SendServiceBusMessageAsync(FetchServiceBusSender, message.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false); + } + else + { + await WriteFailedJobLogToStorageAsync(message, errorMessage, jobName, log).ConfigureAwait(false); + await BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey).ConfigureAwait(false); + } + } + + private static async Task WriteFailedJobLogToStorageAsync(TranscriptionStartedMessage transcriptionStartedMessage, string errorMessage, string jobName, ILogger log) + { + log.LogError(errorMessage); + var errorOutputContainer = FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer; + + var jobErrorFileName = $"jobs/{jobName}.txt"; + await StorageConnectorInstance.WriteTextFileToBlobAsync(errorMessage, errorOutputContainer, jobErrorFileName, log).ConfigureAwait(false); + + foreach (var audioFileInfo in transcriptionStartedMessage.AudioFileInfos) + { + var fileName = StorageConnector.GetFileNameFromUri(new Uri(audioFileInfo.FileUrl)); + var errorFileName = fileName + ".txt"; + try + { + await StorageConnectorInstance.WriteTextFileToBlobAsync(errorMessage, errorOutputContainer, errorFileName, log).ConfigureAwait(false); + await StorageConnectorInstance.MoveFileAsync( + FetchTranscriptionEnvironmentVariables.AudioInputContainer, + fileName, + FetchTranscriptionEnvironmentVariables.ErrorFilesOutputContainer, + fileName, + false, + log).ConfigureAwait(false); + } + catch (RequestFailedException e) + { + log.LogError($"Storage Exception {e} while writing error log to file and moving result"); + } + } + } + + private async Task ProcessSucceededTranscriptionAsync(string transcriptionLocation, string subscriptionKey, TranscriptionStartedMessage serviceBusMessage, string jobName, ILogger log) { log.LogInformation($"Got succeeded transcription for job {jobName}"); @@ -430,14 +542,22 @@ private static async Task ProcessSucceededTranscriptionAsync(string transcriptio var containsMultipleTranscriptions = resultFiles.Skip(1).Any(); var jobId = containsMultipleTranscriptions ? Guid.NewGuid() : new Guid(transcriptionLocation.Split('/').LastOrDefault()); - var databaseConnectionString = FetchTranscriptionEnvironmentVariables.DatabaseConnectionString; - using var databaseConnector = new DatabaseConnector(log, databaseConnectionString); - await databaseConnector.StoreTranscriptionAsync( - jobId, - serviceBusMessage.Locale, - string.IsNullOrEmpty(fileName) ? jobName : fileName, - (float)approximatedCost, - speechTranscript).ConfigureAwait(false); + + try + { + await this.databaseContext.StoreTranscriptionAsync( + jobId, + serviceBusMessage.Locale, + string.IsNullOrEmpty(fileName) ? jobName : fileName, + (float)approximatedCost, + speechTranscript).ConfigureAwait(false); + } + catch (Exception exception) when (exception is DbUpdateException || exception is DbUpdateConcurrencyException) + { + var errorMessage = $"Exception while processing database update: {exception}"; + log.LogError(errorMessage); + generalErrorsStringBuilder.AppendLine(errorMessage); + } } if (FetchTranscriptionEnvironmentVariables.CreateAudioProcessedContainer) @@ -464,100 +584,5 @@ await StorageConnectorInstance.WriteTextFileToBlobAsync( BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey).ConfigureAwait(false).GetAwaiter().GetResult(); } - - private static bool IsRetryableError(string errorCode) - { - return errorCode switch - { - "InvalidUri" or "Internal" or "Timeout" or "Transient" => true, - _ => false, - }; - } - - private static async Task ProcessReportFileAsync(TranscriptionReportFile transcriptionReportFile, ILogger log) - { - var failedTranscriptions = transcriptionReportFile.Details. - Where(detail => !string.IsNullOrEmpty(detail.Status) && - detail.Status.Equals("Failed", StringComparison.OrdinalIgnoreCase) && - !string.IsNullOrEmpty(detail.Source)); - - foreach (var failedTranscription in failedTranscriptions) - { - if (string.IsNullOrEmpty(failedTranscription.Source)) - { - continue; - } - - var safeErrorCode = failedTranscription.ErrorKind ?? "unknown"; - var safeErrorMessage = failedTranscription.ErrorMessage ?? "unknown"; - - var fileName = StorageConnector.GetFileNameFromUri(new Uri(failedTranscription.Source)); - - var message = $"Transcription \"{fileName}\" failed with error \"{safeErrorCode}\" and message \"{safeErrorMessage}\""; - log.LogError(message); - - var errorTxtname = fileName + ".txt"; - await StorageConnectorInstance.WriteTextFileToBlobAsync( - message, - FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer, - errorTxtname, - log).ConfigureAwait(false); - await StorageConnectorInstance.MoveFileAsync( - FetchTranscriptionEnvironmentVariables.AudioInputContainer, - fileName, - FetchTranscriptionEnvironmentVariables.ErrorFilesOutputContainer, - fileName, - false, - log).ConfigureAwait(false); - } - } - - private static async Task RetryOrFailJobAsync(TranscriptionStartedMessage message, string errorMessage, string jobName, string transcriptionLocation, string subscriptionKey, ILogger log, bool isThrottled) - { - log.LogError(errorMessage); - message.FailedExecutionCounter += 1; - var messageDelayTime = GetMessageDelayTime(message.PollingCounter); - - if (message.FailedExecutionCounter <= FetchTranscriptionEnvironmentVariables.RetryLimit || isThrottled) - { - log.LogInformation("Retrying.."); - await ServiceBusUtilities.SendServiceBusMessageAsync(FetchServiceBusSender, message.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false); - } - else - { - await WriteFailedJobLogToStorageAsync(message, errorMessage, jobName, log).ConfigureAwait(false); - await BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey).ConfigureAwait(false); - } - } - - private static async Task WriteFailedJobLogToStorageAsync(TranscriptionStartedMessage transcriptionStartedMessage, string errorMessage, string jobName, ILogger log) - { - log.LogError(errorMessage); - var errorOutputContainer = FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer; - - var jobErrorFileName = $"jobs/{jobName}.txt"; - await StorageConnectorInstance.WriteTextFileToBlobAsync(errorMessage, errorOutputContainer, jobErrorFileName, log).ConfigureAwait(false); - - foreach (var audioFileInfo in transcriptionStartedMessage.AudioFileInfos) - { - var fileName = StorageConnector.GetFileNameFromUri(new Uri(audioFileInfo.FileUrl)); - var errorFileName = fileName + ".txt"; - try - { - await StorageConnectorInstance.WriteTextFileToBlobAsync(errorMessage, errorOutputContainer, errorFileName, log).ConfigureAwait(false); - await StorageConnectorInstance.MoveFileAsync( - FetchTranscriptionEnvironmentVariables.AudioInputContainer, - fileName, - FetchTranscriptionEnvironmentVariables.ErrorFilesOutputContainer, - fileName, - false, - log).ConfigureAwait(false); - } - catch (RequestFailedException e) - { - log.LogError($"Storage Exception {e} while writing error log to file and moving result"); - } - } - } } } diff --git a/samples/ingestion/ingestion-client/Setup/ArmTemplateBatch.json b/samples/ingestion/ingestion-client/Setup/ArmTemplateBatch.json index cda8d7643..f54de0bbd 100644 --- a/samples/ingestion/ingestion-client/Setup/ArmTemplateBatch.json +++ b/samples/ingestion/ingestion-client/Setup/ArmTemplateBatch.json @@ -227,7 +227,7 @@ } }, "variables": { - "Version": "v1.0.1", + "Version": "v2.0.0", "AudioInputContainer": "audio-input", "AudioProcessedContainer": "audio-processed", "ErrorFilesOutputContainer": "audio-failed", @@ -275,8 +275,7 @@ "BinariesRoutePrefix": "https://github.com/Azure-Samples/cognitive-services-speech-sdk/releases/download/ingestion-", "StartTranscriptionByTimerBinary": "[concat(variables('BinariesRoutePrefix'), variables('Version'), '/StartTranscriptionByTimer.zip')]", "StartTranscriptionByServiceBusBinary": "[concat(variables('BinariesRoutePrefix'), variables('Version'), '/StartTranscriptionByServiceBus.zip')]", - "FetchTranscriptionBinary": "[concat(variables('BinariesRoutePrefix'), variables('Version'), '/FetchTranscription.zip')]", - "DatabaseBacpac": "[concat(variables('BinariesRoutePrefix'), variables('Version'), '/IngestionClient.bacpac')]" + "FetchTranscriptionBinary": "[concat(variables('BinariesRoutePrefix'), variables('Version'), '/FetchTranscription.zip')]" }, "resources": [ { @@ -435,23 +434,6 @@ "dependsOn": [ "[variables('DatabaseName')]" ] - }, - { - "name": "Import", - "condition": "[variables('UseSqlDatabase')]", - "type": "extensions", - "apiVersion": "2014-04-01", - "dependsOn": [ - "[resourceId('Microsoft.Sql/servers/databases', variables('SqlServerName'), variables('DatabaseName'))]" - ], - "properties": { - "operationMode": "Import", - "storageKey": "?", - "storageKeyType": "StorageAccessKey", - "administratorLogin": "[parameters('SqlAdministratorLogin')]", - "administratorLoginPassword": "[parameters('SqlAdministratorLoginPassword')]", - "storageUri": "[variables('DatabaseBacpac')]" - } } ] }, diff --git a/samples/ingestion/ingestion-client/Setup/ArmTemplateRealtime.json b/samples/ingestion/ingestion-client/Setup/ArmTemplateRealtime.json index ae09fbc3b..8e1aba06a 100644 --- a/samples/ingestion/ingestion-client/Setup/ArmTemplateRealtime.json +++ b/samples/ingestion/ingestion-client/Setup/ArmTemplateRealtime.json @@ -123,7 +123,7 @@ } }, "variables": { - "Version": "v1.0.1", + "Version": "v2.0.0", "AudioInputContainer": "audio-input", "AudioProcessedContainer": "audio-processed", "ErrorFilesOutputContainer": "audio-failed", diff --git a/samples/ingestion/ingestion-client/Setup/IngestionClient.bacpac b/samples/ingestion/ingestion-client/Setup/IngestionClient.bacpac deleted file mode 100644 index aa2949bb82bd34fc69731477e022c15fadfc4027..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4595 zcmbtYc|26>8=tIEmZX%>q?_!s8>5*ij3q;1MoNh@GY7*gnuR;sC|7$bTBt0MijYWd z))rYRB@|gp8zprqQTUy)-RioZ`^WFhoZ&p{`+T40d7t-lT&c2hnlKn_Cain21?|Pn z*IC7iFqmW;45kFrfpPgPkYoNkmjgey0A`pZ75wMbg_ox_dP*A;+(|5Chz4l?fC*-tZXcR2%L&-qC?o-%r=O;LwDk2(}# zu7Uis`2}aMcSqB~@|o7|pSM5v^{g_Dy(iQ%Yn8cJrlEGcNI_rb>OvgFxaO*?(J+3n zXu*J?pv`&V9*Ja2ir%Ygb%_>1vdGe2u049igOkeT?j^3(#w&`fbk%%D-!-t$hAYzj zo-OrJqE)XwKGN|ngwe8!bRlmb;+~ry7bY zQTri?s#=)=WF`4-OXc79GO*w*ZBvw}sM1Q%flM{8YR~m@4mtF_+CH}_OHpMz8m%k! zGFK($mZ_bkNah~jra9tp{rwEjqcbUKJC;Y#D4hAh4xfYdeDzOP7vItw<$2=~8Bcd^ zPpH~1j!%5ogI$?@_l~$Px!1t*>V8i6xBk!t8`6)zoJosw(Tlmdt>eM>;iJis%7xxv z2m{v{j#{+fYp8FU?Yo<`gXfVVZ#;dyS9wn-N-h2xF6snJtMrMk*b>7 ztXI{5U61>HX0wUSQ4k{hNph(+20%Oa(L0_$TxASiSOrNK0TL9BEI!g9juARf29E0$Wm`-X>=vCfmsz`=t2 z_XZA82dn0YLr)Dn&MClMFG@IenQol;G<`Wr>1EE6Ca1xs4~X>9y)-P$ZKV;yWw#+!S2Ci7?azMgws_k8nERa?x4`L8W$ z`!78Tp&MhhQJN-MPB~STo|`YCs2o|FI~1#Wl^Q*$LFT@?|#;BzA0$g^BhSU{P}0InsLdmY?+2n8II_io00Y|nhpwMxWL)>Zem z*-OJBld)zRZ{Dgqmgr{K-Htkxn_O<-KcJ!;5}}%Lg1da>i{GPS+TvFSc?6%QIOVVK zta7oMQ&@f5gr8I4!M?DlFgcHFQblgWM>_)D+21zDZ|&F2pCPWWzP6yJ%gxHqsdb;x z=MAgy^BVJ5Ewz`q)g1P&=K6KCnKz75(^LgCoJ+59O{}9zf12J6YGbc8so(c(*xTw- zg?$-&KGd*1giZ~b^-qR%OK(bc2CHVXbvg59%I2u#ev*GwqC?3JOQ&04w0iwz<`=H+ zksUgxRC>Lnc;}ng2a>+w@=vze%U8v(KXZ5erdeOt&)B^~yY@P&)`FlYnYAi2++=Q* zv3R3S)G7H}Q6(QrS0nJbbgD46eYdtFcvicz8nJ9|Muh2z969y|SN=#NUX}72sv&y& z!~G{`2H(`7d%5UimJxXx2XB&ghem&5tGFHx1}g zV=OL&zECYP|EN}cWr(fOdDcF+``ejTU~ReCfqj{VyW%^SAgaUJhJ5!$I`*xZjj>pr zuiA=Vl8E_3-_$#uHa`#gYQt_`*A>6B0+7o!3hvn&5!96;r%>6d;67Tcf_Y(cg0`=% zfAH#+UjGvqyJj7EN3FKB!$&RdJm_t7N$9&JsR3ezmyc*>epl%2a@b|2+G%%cWE;;? zVfYWmCxb+X!?JP(A2y_a4#=Xe$G<_U{<+cFJH$kwnL5JCX|{_vunZV}85?LyIeac! zWz?%U>nYmfUV3xLU0z9Pdj4oZ*2)V*qe;mRBSlB5JAZ4o`+TkGc=xuaqEY|77c)wk zt)5E@mYfD3vUd#Sys!IyYv6YKh@$IH^S_MO^XW1);cKB=4VwQHfawB?02Uwu{_7Ng zJM5ym3#l17V9=J_rMG#b%lFk0DnWSdfrsv?O6MIj+_GNZS}KuWRDYl6hwdv*L~Ll` zQh8w??}`_`GD@L@R(qta@bA~|X1N^LMvhsQemmvqZ%D)5j}&kEQZF$D9D>l_8-f%IOQi( zB8?K`D-evD$E!6)>{+`PZyK5LbI?7>bsf=Az}e6}7Ycf#EuSX>c_JUWBp4KW|5xD4 z4pE`N)uXZ>-`7*{Uch{I!m4%DiZ|QYUCY68ug#mUilu&~w6i!yF?-RQ3*%Ey0Rim$Hq)YqO zb`RZ0it`yptU*{tHofqYmiTmTG0>+|556-ql(ot22ux`xc+s<18k;f{2@KW)9q3d7 z3P{ZgFqrgKhGJVQVEeOq{}obiVJUH?^wbvu3WRJvkED+>N9w~t9+S^v^ZZHrVv(Pj zrT$8?iZT%@k#VDNNP#CL=?92J!Bz-_kQo4S0iijU%@pv3d_R#nlg~v6w{V1@05XeU z0Zar96tDpfI}8v(P7r7$3XMRb^&zWpIMEeg1_Az{tAHO23PfyBI7WsT>&Dka#+W0e zA0lFsAXS6-((D0jp3v_1V7@>EvM5mTl0^bBNJLD_noy&O1R%hjur=xIZ+Gx<{u+P_ zlJun}^~vbz7w~U7U@(U-;X-vKU4?+n5R3Q%p*|TxcG@ry(=3RHm>w14S_q>t5GO%e z&4L`UMG`U&$G`&&B;E{Q;w{XuSdd_5$-*MdKnpAi$wU#DOb|sxOo>l{0SH87G!l(6 zLlVr;XgUgIg~M2(p^ynkOB5Pwf<#*(A?#@)so;c`9gjsuB9V}e^h-w~Ara_n4$-6+ z5g`?rkcINf5;H|oJjpIoB|_W6focLO4|AxjOyLvRAfa&MAeq8##T=1X0Frp1SR?>A zrf^p=gTrQ0K?$871oB7>B*qWthxbFFSU4nr0U*SFaP+h9$+{V{jzi%I=IAN$N#4&I zw6U5d|GyPJR2)Cm#sa}J5)I84Gd{xrF&DZt<~H)ptY7=#Gi(4FpL1UG}Dq-BYQ z;5N5}t0*fuiFt4|7A{RC91Ud9+h}3A7gkh~S2{Mi3iCfd6gPmM?&MXRM&0 z4wtrR2q>6zJ^{&EMj*(9YFW$^{m?Da&R_sS&{hDBRUp72V~L1qsN+_kxF>mQgQc5* zkPKDhucRqid%gf<`}3e|Lp>yf3V_WMN$pQdpVD#xgd$J?DM@Yr{V>IM6|lL0K=QLy zoHR+((5JL$Y;G_IwB>WeTpk&Vfl&I9FeOF@7#xrcSsVXnoDL=XSM(-P9!ofJeD8vq zOT-q6*vv_Qi8Q`gzyv4pp|Qgt(^xcwz%+~rS7UzN`JrQ(qVq)nhYt9Yp=O+(A=QE! zb!_ka#Xr$D0AvOU#oVdoOcLN80yaq>G4-}UpKOW7p`nXlO~GR*C<5M=g0Ur_>}~Ou z1RH{-J;efLX^X<(Z7_H|#o7Xi#ba!7I2#n+#s+I?heF|@y?VmgI3QDx54<_|wp5&f$6^Fiqc4e6VdUyn_{D*t@o r_)+;Yq&&^>W8A>MP4yoQ)GEP#nDMSud1&s#U~2m1R_UHbGt