I have the following script:
import time
import azure.cognitiveservices.speech as speechsdk
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
# The phrase your keyword recognition model triggers on.
KEYWORD = "KEYWORD"
def recognize_keyword_from_wav_file(wav_file_path):
"""Performs keyword-triggered speech recognition with a WAV file."""
global true_positives, false_positives, false_negatives
try:
speech_config = speechsdk.SpeechConfig(subscription='xyz', region='westeurope')
model = speechsdk.KeywordRecognitionModel("./keyword.table")
audio_config = speechsdk.audio.AudioConfig(filename=wav_file_path)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
except Exception as e:
logging.error(f"Failed to initialize speech recognizer: {e}")
return
def recognizing_cb(evt):
"""Callback for recognizing event."""
try:
if evt.result.reason == speechsdk.ResultReason.RecognizingKeyword:
logging.info(f'RECOGNIZING KEYWORD: {evt}')
elif evt.result.reason == speechsdk.ResultReason.RecognizingSpeech:
logging.info(f'RECOGNIZING: {evt}')
except Exception as e:
logging.error(f"Error in recognizing callback: {e}")
def recognized_cb(evt):
"""Callback for recognized event."""
try:
if evt.result.reason == speechsdk.ResultReason.RecognizedKeyword:
logging.info(f'RECOGNIZED KEYWORD: {evt}')
elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
logging.info(f'RECOGNIZED: {evt}')
except Exception as e:
logging.error(f"Error in recognized callback: {e}")
try:
speech_recognizer.recognizing.connect(recognizing_cb)
speech_recognizer.recognized.connect(recognized_cb)
speech_recognizer.session_started.connect(lambda evt: logging.info(f'SESSION STARTED: {evt}'))
speech_recognizer.session_stopped.connect(lambda evt: logging.info(f'SESSION STOPPED {evt}'))
speech_recognizer.canceled.connect(lambda evt: logging.info(f'CANCELED {evt}'))
speech_recognizer.start_keyword_recognition(model)
logging.info(f'Say something starting with "{KEYWORD}" followed by whatever you want...')
speech_recognizer.recognize_once()
speech_recognizer.stop_keyword_recognition()
except Exception as e:
logging.error(f"Error during speech recognition: {e}")
# Example usage:
if __name__ == "__main__":
wav_file_path = "./output01.wav"
recognize_keyword_from_wav_file(wav_file_path)
This is only giving me RecognizedSpeech
but never RecognizedKeyword
. This only happens when using wave files as input to the keyword recognition model instead of the default microphone stream by: audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)