I’ve been trying to use the recognizeMicrophone
service to provide live transcriptions, but so far I’ve been unable to get it to work. I’ve done all the necessary setup, such as setting my credentials, but as soon as I call the recognizeMicrophone
function by pressing a “Start Recording” button in my HTML, I get the error:
stream err WebSocket connection error: WebSocket connection error
at socket.onerror (/Users/me/electron_tts_app/node_modules/.pnpm/[email protected]/node_modules/watson-speech/speech-to-text/recognize-stream.js:216:15)
I’m actually running this in an electron app, and this is my preload.js
file:
require('dotenv').config()
const fs = require('node:fs')
const { IamAuthenticator, IamTokenManager } = require('ibm-watson/auth');
const SpeechToTextV1 = require('ibm-watson/speech-to-text/v1');
const recognizeMicrophone = require('watson-speech/speech-to-text/recognize-microphone');
const {contextBridge} = require('electron');
console.log('running')
const getIBMWatsonClientToken = (() => {
const tokenManager = new IamTokenManager({
apikey: process.env.SPEECH_TO_TEXT_IAM_APIKEY
});
let cachedToken = null
return async () => {
if (cachedToken)
return cachedToken
cachedToken = await tokenManager.getToken()
console.log('got token as', cachedToken)
fs.appendFile('logs.txt', `Got token: ${JSON.stringify(cachedToken)}n`, function() {
console.log('arguments', arguments)
})
return cachedToken
}
})()
const setup = () => {
document.getElementById('start').onclick = async function () {
const stream = recognizeMicrophone({
accessToken: await getIBMWatsonClientToken(), // You need to generate a token from your API key
token: await getIBMWatsonClientToken(), // You need to generate a token from your API key
objectMode: true, // enables object mode to receive objects instead of strings
extractResults: true, // simplifies the response
format: false // prevents formatting on the client side
// model
});
stream.on('data', (data) => {
console.log('stream data', data)
document.getElementById('transcript').value += data.alternatives[0].transcript + 'n';
});
stream.on('error', function(err) {
console.error('stream err', err);
});
document.getElementById('stop').onclick = function() {
stream.stop();
};
};
}
contextBridge.exposeInMainWorld('electronAPI', {
sayHello: () => console.log('hello world'),
setup,
trySpeechToText: () => {
const speechToText = new SpeechToTextV1({
authenticator: new IamAuthenticator({ apikey: process.env.SPEECH_TO_TEXT_IAM_APIKEY }),
serviceUrl: process.env.SPEECH_TO_TEXT_URL
});
speechToText.recognize({
audio: fs.createReadStream(__dirname + '/../test-recording.m4a'),
contentType: 'audio/mp4',
})
.then(r => {
console.log('speech2txt response', r)
})
}
})
You might notice there’s also a trySpeechToText
function exposed to the frontend. When I try that, I see this error in the console:
Inspecting the network console for, I see that this is the full response for the trySpeechToText
call:
{
"error": "Stream was 15 bytes but needs to be at least 100 bytes.",
"code": 400,
"code_description": "Bad Request"
}
What could be causing both the recognizeMicrophone
and speechToText
modules to fail?