I’m trying to build realtime speach to text transcription in node.js using AWS transcribe. I’m sure that the audio is transferred from react to node.js through socket.io and I have cross verified by creating writeStream and appending the audio chunks and the audio is playing from that file. But the AWS transcribe service results empty response.
Tech stack:
- React.js
- Node.js
- Socket.io
- @aws-sdk/client-transcribe-streaming
Code:
const express = require("express");
const http = require("http");
const { Server } = require("socket.io");
const {
TranscribeStreamingClient,
StartStreamTranscriptionCommand,
} = require("@aws-sdk/client-transcribe-streaming");
const cors = require("cors");
const app = express();
app.use(cors()); // Enable CORS for all origins
const server = http.createServer(app);
const io = new Server(server, {
cors: {
origin: "*", // Allow all origins (or specify your frontend's URL)
methods: ["GET", "POST"],
allowedHeaders: ["Content-Type"],
credentials: true,
},
});
const transcribeClient = new TranscribeStreamingClient({
region: "us-west-2",
});
io.on("connection", (socket) => {
console.log("Client connected");
let audioQueue = [];
socket.on("audio", (chunk) => {
audioQueue.push(new Uint8Array(chunk));
});
const audioStream = async function* () {
while (true) {
if (audioQueue.length > 0) {
yield { AudioEvent: { AudioChunk: audioQueue.shift() } };
} else {
await new Promise((resolve) => setTimeout(resolve, 100)); // Wait for more data
}
}
};
const command = new StartStreamTranscriptionCommand({
LanguageCode: "en-US",
MediaSampleRateHertz: 16000,
MediaEncoding: "pcm",
AudioStream: audioStream(),
});
const startTranscription = async () => {
try {
const response = await transcribeClient.send(command);
console.log(response.$metadata)
for await (const event of response.TranscriptResultStream) {
console.log(event)
if (event.TranscriptEvent) {
const transcriptResults = event.TranscriptEvent.Transcript.Results;
transcriptResults.forEach((result) => {
if (result.Alternatives.length > 0) {
const transcriptText = result.Alternatives[0].Transcript;
socket.emit("transcript", transcriptText);
}
});
}
}
} catch (error) {
console.error("Error streaming transcription:", error);
socket.emit("error", error.message);
}
};
startTranscription();
socket.on("disconnect", () => {
console.log("Client disconnected");
});
});
server.listen(4000, () => {
console.log("Server is running on port 4000");
});
Output:
Server is running on port 4000
Client connected
{
httpStatusCode: 200,
requestId: 'dfbe7ba4-2340-48bf-8d68-d220cd13be2d',
extendedRequestId: undefined,
cfId: undefined,
attempts: 1,
totalRetryDelay: 0
}
{ TranscriptEvent: { Transcript: { Results: [] } } }
Can you help me to solve this issue.
Is there any different approach to achieve my goal.