I have a simple microphone component that:
- records user voice upon click
- stops recording when clicked again
- transcribes the voice into text.
Although everything works fine on web, the stopRecording
does not function in mobile settings. It never gets to the point of setTranscribing(false)
I’m struggling to find the exact issue since I cannot look at the exact error log on my phone.
Microphone.tsx
'use client';
import React, { useEffect, useRef, useState } from 'react';
import { CircleStopIcon, LoaderCircleIcon, MicIcon } from 'lucide-react';
import { toast } from 'sonner';
import { transcribeSpeech } from '~/api/openai/actions';
const Microphone = ({
stage,
updateAnswerAndGoToNextStage,
setAnswer,
setShowInputMode,
}: {
stage: number;
updateAnswerAndGoToNextStage: (text: string) => void;
setAnswer: React.Dispatch<React.SetStateAction<string>>;
setShowInputMode: React.Dispatch<React.SetStateAction<boolean>>;
}) => {
const streamRef = useRef<MediaStream | null>(null);
const [recording, setRecording] = useState(false);
const [transcribing, setTranscribing] = useState(false);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const [isClient, setIsClient] = useState(false);
useEffect(() => {
setIsClient(true);
}, []);
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
streamRef.current = stream;
mediaRecorderRef.current = new MediaRecorder(stream);
audioChunksRef.current = [];
mediaRecorderRef.current.ondataavailable = (event) => {
audioChunksRef.current.push(event.data);
};
mediaRecorderRef.current.start();
setRecording(true);
setShowInputMode(false);
} catch (error) {
console.error('Error starting recording:', error);
toast.success('Error starting recording:', error);
toast.success('An unexpected error occurred. Please try again.');
}
};
const stopRecording = async () => {
if (mediaRecorderRef.current) {
mediaRecorderRef.current.stop();
setRecording(false);
setTranscribing(true);
// Stop all tracks in the stream
if (streamRef.current) {
streamRef.current.getTracks().forEach((track) => track.stop());
}
mediaRecorderRef.current.onstop = async () => {
const audioBlob = new Blob(audioChunksRef.current, {
type: 'audio/webm',
});
try {
const reader = new FileReader();
reader.onload = async (e) => {
if (e.target && e.target.result) {
const base64Audio = (e.target.result as string).split(',')[1];
if (base64Audio) {
toast.success('Base64 audio:', base64Audio);
const text = await transcribeSpeech(base64Audio);
toast.success('Transcription result:', text);
setTranscribing(false);
setShowInputMode(true);
updateAnswerAndGoToNextStage(text);
} else {
toast.success('No base64 audio found'); // Add this line
setTranscribing(false); // Ensure this is set to false
}
} else {
toast.success('FileReader result is null');
}
};
reader.readAsDataURL(audioBlob);
} catch (error) {
console.error('Transcription error:', error);
}
};
}
};
return (
<div className="flex flex-col items-center">
{recording && !transcribing && (
<button
className="mb-4 mt-4 flex h-24 w-24 items-center justify-center rounded-full bg-gray-200 transition-colors duration-200 ease-in-out hover:cursor-pointer hover:bg-gray-100"
onClick={stopRecording}
>
<CircleStopIcon className={`h-24 w-24 text-gray-600`} />
</button>
)}
{!recording && transcribing && (
<button
disabled
className="cursor-disabled mb-4 mt-4 flex h-24 w-24 items-center justify-center rounded-full bg-gray-200"
>
<LoaderCircleIcon
className={`h-20 w-20 animate-spin text-gray-600`}
/>
</button>
)}
{!recording && !transcribing && isClient && (
<button
className="mb-5 mt-4 flex h-24 w-24 items-center justify-center rounded-full bg-gray-200 transition-colors duration-200 ease-in-out hover:cursor-pointer hover:bg-gray-100"
onClick={startRecording}
>
<MicIcon className={`h-16 w-16 text-gray-600`} />
</button>
)}
<p className="text-lg">
{!recording && !transcribing && 'Click to respond'}
{recording && 'Click again to submit'}
{!recording && transcribing && 'Transcribing...'}
</p>
</div>
);
};
export default Microphone;
~/api/openai/actions
export async function transcribeSpeech(audioBase64: string) {
try {
const audioBuffer = Buffer.from(audioBase64, 'base64');
const formData = new FormData();
formData.append('file', new Blob([audioBuffer]), 'audio.webm');
formData.append('model', 'whisper-1');
formData.append('language', 'en');
const response = await fetch(
'https://api.openai.com/v1/audio/transcriptions',
{
method: 'POST',
headers: {
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
},
body: formData,
},
);
if (!response.ok) {
throw new Error('Transcription failed');
}
const result = await response.json();
return result.text;
} catch (error) {
console.error('Error transcribing speech:', error);
throw error;
}
}