I totally don’t understand why, no changes in the code, absolutely nothing, and today the service is just not working without giving any error. It starts the transcribing job but the it immediately cancel it without transcribing.
The code (customized from https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-stt-diarization?tabs=windows&pivots=programming-language-python, but it’s pretty much the same):
import logging
import time
import azure.cognitiveservices.speech as speechsdk
import os
log = logging.getLogger(__name__)
class ConversationCb:
def __init__(self):
self.text = ''
def conversation_transcriber_canceled_cb(self, evt: speechsdk.SessionEventArgs):
log.info(f'transcriber canceled event with args={evt}')
print('Canceled event')
def conversation_transcriber_session_started_cb(self, evt: speechsdk.SessionEventArgs):
log.info(f'transcriber session started event with args={evt}')
def conversation_transcriber_session_stopped_cb(self, evt: speechsdk.SessionEventArgs):
log.info(f'transcriber session stopped event with args={evt}')
def conversation_transcriber_transcribed_cb(self, evt: speechsdk.SpeechRecognitionEventArgs):
log.info(f'transcriber transcribed event with args={evt}')
if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
text = evt.result.text
speaker_id = evt.result.speaker_id
log.info(f'tspeaker={speaker_id} - text="{text}"')
self.text += f'Speaker: {speaker_id}n'
self.text += f'{text}n'
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
print('tNOMATCH: Speech could not be TRANSCRIBED: {}'.format(evt.result.no_match_details))
from pathlib import Path
def recognize_from_file(filename, speech_key, speech_region):
# Specifica il percorso del file
file_path = Path(filename)
# Verifica se il file esiste
if file_path.is_file():
print("Il file esiste.")
else:
print("Il file non esiste.")
log.info(f'recognize text from file={filename}')
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_region)
speech_config.speech_recognition_language = "it-IT"
# speech_config.set_property(speechsdk.PropertyId.Speech_LogFilename, "speech.log")
audio_config = speechsdk.audio.AudioConfig(filename=filename)
conversation_transcriber = speechsdk.transcription.ConversationTranscriber(
speech_config=speech_config, audio_config=audio_config)
# Connect callbacks to the events fired by the conversation transcriber
cb = ConversationCb()
try:
conversation_transcriber.transcribed.connect(cb.conversation_transcriber_transcribed_cb)
except Exception as e:
log.error(f'Errore durante la connessione del callback transcribed: {e}')
#conversation_transcriber.transcribed.connect(cb.conversation_transcriber_transcribed_cb)
conversation_transcriber.session_started.connect(cb.conversation_transcriber_session_started_cb)
conversation_transcriber.session_stopped.connect(cb.conversation_transcriber_session_stopped_cb)
conversation_transcriber.canceled.connect(cb.conversation_transcriber_canceled_cb)
transcribing_stop = False
def stop_cb(evt: speechsdk.SessionEventArgs):
log.info(f'transcriber session stopped | canceled event with args={evt}')
nonlocal transcribing_stop
transcribing_stop = True
# Stop transcribing on either session stopped or canceled events
conversation_transcriber.session_stopped.connect(stop_cb)
conversation_transcriber.canceled.connect(stop_cb)
conversation_transcriber.start_transcribing_async()
while not transcribing_stop:
time.sleep(.5)
conversation_transcriber.stop_transcribing_async()
return cb.text
The logs:
Il file esiste.
2024-08-09 11:38:20,211:INFO:audio.transcriber:recognize text from file=C:UsersSPARAP~1.GRUAppDataLocalTempdfab30b3-48d5-4473-aea5-8e200b7ef2af_converted.wav
2024-08-09 11:38:20,236:INFO:audio.transcriber:transcriber session started event with args=SessionEventArgs(session_id=bdc0db19e1fc457d9a2c4e41bd3f38d9)
2024-08-09 11:38:21,500:INFO:audio.transcriber:transcriber canceled event with args=ConversationTranscriptionCanceledEventArgs(session_id=bdc0db19e1fc457d9a2c4e41bd3f38d9, result=ConversationTranscriptionResult(result_id=19547d1034264367865610d53464876b, speaker_id=, text=, reason=ResultReason.Canceled))
Canceled event
2024-08-09 11:38:21,500:INFO:audio.transcriber:transcriber session stopped | canceled event with args=ConversationTranscriptionCanceledEventArgs(session_id=bdc0db19e1fc457d9a2c4e41bd3f38d9, result=ConversationTranscriptionResult(result_id=19547d1034264367865610d53464876b, speaker_id=, text=, reason=ResultReason.Canceled))
2024-08-09 11:38:21,500:INFO:audio.transcriber:transcriber session stopped event with args=SessionEventArgs(session_id=bdc0db19e1fc457d9a2c4e41bd3f38d9)
2024-08-09 11:38:21,501:INFO:audio.transcriber:transcriber session stopped | canceled event with args=SessionEventArgs(session_id=bdc0db19e1fc457d9a2c4e41bd3f38d9)
2024-08-09 11:38:21,738:INFO:manager:recognized text=""
2024-08-09 11:38:21,744:INFO:manager:cleaned speaker text=""
I’m using it via Flask thorugh a service that I’m now running locally.
The audio are 100% not compromised and I’m using samples that I’ve already used as a test when I wrote the code a couple of months ago, and everything was working fine.