I’m building a live mic speech-to-text transcription on Azure using .NET. However, a single sentence results in multiple repetitive sentences. I need help identifying the issue that causes repetitive output. If I speak in the mic saying:
“Hello my name is John”,
the code produces the following output:
“Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John Hello my name is John”
What I want is live transcription of a sentence that appears once without repetition, exactly how it works on Azure’s Speech Studio – Real-time speech to text.
Here’s my code:
Program.cs:
using Microsoft.AspNetCore.Hosting;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using System;
using System.Linq;
using [ ].Hubs;
using [ ].Services;
using [ ].Data;
public class Program
{
public static void Main(string[] args)
{
CreateHostBuilder(args).Build().Run();
}
public static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureWebHostDefaults(webBuilder =>
{
webBuilder.UseUrls("http://localhost:5001");
webBuilder.ConfigureServices((context, services) =>
{
services.AddControllers();
services.AddSignalR();
// Configure CORS
services.AddCors(options =>
{
options.AddDefaultPolicy(builder =>
{
builder.WithOrigins("http://localhost:3000")
.AllowAnyHeader()
.AllowAnyMethod()
.AllowCredentials();
});
});
services.AddSingleton<SpeechService>();
services.AddSingleton<TranscriptionService>();
// Add DbContext and hosted service
services.AddDbContext<CosmosDbContext>();
services.AddHostedService<CosmosDbTestService>();
})
.Configure((context, app) =>
{
if (context.HostingEnvironment.IsDevelopment())
{
app.UseDeveloperExceptionPage();
}
// Enable CORS
app.UseCors();
app.UseRouting();
app.UseEndpoints(endpoints =>
{
endpoints.MapControllers();
endpoints.MapHub<TranscriptionHub>("/transcriptionHub");
});
});
})
.ConfigureLogging(logging =>
{
logging.ClearProviders();
logging.AddConsole();
});
}
SpeechService.cs (/Service):
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
namespace [ ].Services
{
public class SpeechService
{
private readonly SpeechRecognizer _speechRecognizer;
private bool isRecognizing = false;
public event Action<string>? OnRecognizing;
public event Action<string>? OnRecognized;
public SpeechService()
{
var subscriptionKey = Environment.GetEnvironmentVariable("AZURE_SPEECH_KEY");
var region = Environment.GetEnvironmentVariable("AZURE_SPEECH_REGION");
if (string.IsNullOrEmpty(subscriptionKey) || string.IsNullOrEmpty(region))
{
throw new InvalidOperationException("Azure Speech Service key and region must be provided via environment variables.");
}
var speechConfig = SpeechConfig.FromSubscription(subscriptionKey, region);
speechConfig.SpeechRecognitionLanguage = "de-DE";
speechConfig.EnableDictation(); // Enable dictation mode for explicit punctuation
var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
_speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
_speechRecognizer.Recognizing += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizingSpeech)
{
OnRecognizing?.Invoke(e.Result.Text);
}
};
_speechRecognizer.Recognized += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
OnRecognized?.Invoke(e.Result.Text);
}
};
_speechRecognizer.Canceled += (s, e) =>
{
isRecognizing = false;
Console.WriteLine($"Recognition canceled: {e.Reason}, {e.ErrorDetails}");
};
_speechRecognizer.SessionStopped += (s, e) =>
{
isRecognizing = false;
Console.WriteLine($"Session stopped: {e.SessionId}");
};
}
public async Task StartRecognitionAsync()
{
if (!isRecognizing)
{
isRecognizing = true;
await _speechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);
}
}
public async Task StopRecognitionAsync()
{
if (isRecognizing)
{
await _speechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
isRecognizing = false;
}
}
}
}
TranscriptionService.cs (/Service):
using Microsoft.AspNetCore.SignalR;
using System.Collections.Concurrent;
using [ ].Hubs;
namespace [ ].Services
{
public class TranscriptionService
{
private readonly IHubContext<TranscriptionHub> _hubContext;
private readonly ConcurrentDictionary<string, string> _connections = new ConcurrentDictionary<string, string>();
public TranscriptionService(IHubContext<TranscriptionHub> hubContext)
{
_hubContext = hubContext;
}
public void AddConnection(string connectionId)
{
_connections[connectionId] = connectionId;
}
public void RemoveConnection(string connectionId)
{
_connections.TryRemove(connectionId, out _);
}
public async Task BroadcastRecognizing(string text)
{
foreach (var connectionId in _connections.Keys)
{
await _hubContext.Clients.Client(connectionId).SendAsync("ReceiveRecognizing", text);
}
}
public async Task BroadcastRecognized(string text)
{
foreach (var connectionId in _connections.Keys)
{
await _hubContext.Clients.Client(connectionId).SendAsync("ReceiveRecognized", text);
}
}
}
}
TranscriptionHub.cs (/Hub):
using Microsoft.AspNetCore.SignalR;
using [ ].Services;
namespace [ ].Hubs
{
public class TranscriptionHub : Hub
{
private readonly SpeechService _speechService;
private readonly TranscriptionService _transcriptionService;
public TranscriptionHub(SpeechService speechService, TranscriptionService transcriptionService)
{
_speechService = speechService;
_transcriptionService = transcriptionService;
}
public override async Task OnConnectedAsync()
{
_transcriptionService.AddConnection(Context.ConnectionId);
_speechService.OnRecognizing += HandleRecognizing;
_speechService.OnRecognized += HandleRecognized;
await base.OnConnectedAsync();
}
public override async Task OnDisconnectedAsync(Exception? exception)
{
_transcriptionService.RemoveConnection(Context.ConnectionId);
_speechService.OnRecognizing -= HandleRecognizing;
_speechService.OnRecognized -= HandleRecognized;
await base.OnDisconnectedAsync(exception);
}
private async void HandleRecognizing(string text)
{
await _transcriptionService.BroadcastRecognizing(text);
}
private async void HandleRecognized(string text)
{
await _transcriptionService.BroadcastRecognized(text);
}
public async Task StartTranscription()
{
await _speechService.StartRecognitionAsync();
}
public async Task StopTranscription()
{
await _speechService.StopRecognitionAsync();
}
}
}
ecobiz is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
1