I’m trying to get speech-to-text with the whisper module from OpenAI. The audio file has been saved in firebase storage. It works when I try to transcribe the file locally, however, when I try to get the file from Firebase storage it doesn’t work.
This is my code:
import React, {useCallback, useEffect, useState} from 'react';
import {ScrollView, StyleSheet, Text, TouchableOpacity, View} from 'react-native';
import Icon from 'react-native-vector-icons/MaterialCommunityIcons';
import {Audio} from 'expo-av';
import {storage} from './firebaseConfig';
import {getDownloadURL, listAll, ref, uploadBytes} from 'firebase/storage';
import OpenAI from "openai";
import RNFetchBlob from "rn-fetch-blob";
const RecordingStatuses = {
IDLE: 'idle', RECORDING: 'recording', PAUSED: 'paused',
};
const App = () => {
const [recording, setRecording] = useState(null);
const [recordingStatus, setRecordingStatus] = useState(RecordingStatuses.IDLE);
const [audioPermission, setAudioPermission] = useState(false);
const [recordingsList, setRecordingsList] = useState([]);
const [showRecordingsList, setShowRecordingsList] = useState(false);
const openai = new OpenAI({apiKey: 'sk-xxx'});
useEffect(() => {
(async () => {
const {granted} = await Audio.requestPermissionsAsync();
console.log('Permission Granted:', granted);
setAudioPermission(granted);
})();
}, []);
const startRecording = useCallback(async () => {
if (!audioPermission || recordingStatus !== RecordingStatuses.IDLE) return;
await Audio.setAudioModeAsync({
allowsRecordingIOS: true, playsInSilentModeIOS: true,
});
const newRecording = new Audio.Recording();
await newRecording.prepareToRecordAsync(Audio.RecordingOptionsPresets.HIGH_QUALITY);
await newRecording.startAsync();
setRecording(newRecording);
setRecordingStatus(RecordingStatuses.RECORDING);
}, [audioPermission, recordingStatus]);
const pauseRecording = useCallback(async () => {
if (recordingStatus === RecordingStatuses.RECORDING && recording) {
await recording.pauseAsync();
setRecordingStatus(RecordingStatuses.PAUSED);
}
}, [recordingStatus, recording]);
const stopRecording = useCallback(async () => {
if (recordingStatus === RecordingStatuses.RECORDING && recording) {
await recording.stopAndUnloadAsync();
const uri = recording.getURI(); // Lokale bestandspad
await uploadAudioToStorage(uri); // Nieuwe functie om audio te uploaden
setRecording(null);
setRecordingStatus(RecordingStatuses.IDLE);
}
}, [recordingStatus, recording]);
async function uploadAudioToStorage(uri) {
try {
const datumNu = new Date();
const tijdDatum = datumNu.getTime();
const filename = `Zorgplan_${tijdDatum}.mp3`;
const storageRef = ref(storage, `Opnames/${filename}`);
const blob = await fetch(uri).then(r => r.blob());
await uploadBytes(storageRef, blob, {contentType: 'audio/mp3'});
const downloadUrl = await getDownloadURL(storageRef);
console.log('File available at', downloadUrl);
} catch (error) {
console.error("Error uploading file and creating document:", error);
}
}
const fetchRecordingsFromStorage = async () => {
// Verondersteld dat `storage` correct is geïnitialiseerd in je firebaseConfig.
const listRef = ref(storage, 'Opnames/');
listAll(listRef)
.then(async (res) => {
const urls = await Promise.all(res.items.map((itemRef) => getDownloadURL(itemRef)));
console.log(urls); // Dit zijn de URLs naar je bestande n
setRecordingsList(urls.map((url, index) => ({url, id: index, filename: `Opname ${index + 1}`}))); // Opslaan in de staat
})
.catch((error) => {
console.error("Error fetching storage listings", error);
});
};
const handleTranscribe = async (recordingUrl) => {
console.log('Transcribing URL:', recordingUrl); // Verifieer of de URL correct wordt gelogd
if (!recordingUrl) {
console.error("Geen geldige URL doorgegeven aan handleTranscribe");
return;
}
await transcribeAudio(recordingUrl);
}
/* async function transcribeAudio(recordingUrl) {
let fileToRead = '';
const transcription = await openai.audio.transcriptions.create({
file: RNFetchBlob.fs.readStream(recordingUrl, "base64"),
model: "whisper-1",
});
console.log(transcription.text);
}
*/
async function transcribeAudio(recordingUrl) {
try {
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': "Bearer xxxxxxxxxxxxxxx",
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: "whisper-1",
file: recordingUrl }),
});
const transcription = await response.json();
console.log("Transcription result:", transcription);
} catch (error) {
console.error("Error transcribing audio:", error);
}
}
/******************************DIT IS HTML VAN DE APP*******************************************************/
return (<View style={styles.container}>
<View style={styles.headerContainer}>
<Text style={styles.header}>...</Text>
</View>
<TouchableOpacity
style={styles.button}
onPress={() => {
fetchRecordingsFromStorage();
setShowRecordingsList(!showRecordingsList);
}}
>
<Text style={styles.buttonText}>Laad Opnames</Text>
</TouchableOpacity>
{showRecordingsList && (<ScrollView style={styles.recordingsList}>
{recordingsList.map((recording, index) => (<View key={index} style={styles.recordingItem}>
<Text style={styles.recordingText}>{recording.filename}</Text>
<TouchableOpacity
onPress={() => handleTranscribe(recording.url)}
style={{marginLeft: 10}}
>
<Icon name="microphone" size={30} color="#007bff"/>
</TouchableOpacity>
</View>))}
</ScrollView>)}
<TouchableOpacity style={[styles.microphoneButton]} onPress={startRecording}>
<Icon name="microphone" size={30} color="#FFF"/>
</TouchableOpacity>
{(recordingStatus === 'recording' || recordingStatus === 'paused') && (<View style={styles.playbackControls}>
<TouchableOpacity onPress={stopRecording} style={styles.controlButton}>
<Icon name="stop" size={30} color="#FFF"/>
<Text style={styles.controlText}>Opslaan</Text>
</TouchableOpacity>
<TouchableOpacity
onPress={recordingStatus === 'recording' ? pauseRecording : startRecording}
style={styles.controlButton}
>
<Icon name={recordingStatus === 'recording' ? "pause" : "play"} size={30} color="#FFF"/>
<Text
style={styles.controlText}>{recordingStatus === 'recording' ? "Pauzeren" : "Verder opnemen"}</Text>
</TouchableOpacity>
</View>)}
</View>);
};
/******************************DIT IS CSS VAN DE APP*******************************************************/
const styles = StyleSheet.create({
container: {
flex: 1, justifyContent: 'space-between',
}, headerContainer: {
marginTop: 50, alignItems: 'center',
}, header: {
fontSize: 32, fontWeight: 'bold',
}, button: {
backgroundColor: '#007bff', borderRadius: 20, padding: 10, marginVertical: 10, alignSelf: 'center',
}, buttonText: {
color: '#FFF', textAlign: 'center',
}, recordingsList: {
marginTop: 20, width: '100%',
}, recordingItem: {
flexDirection: 'row',
justifyContent: 'space-between',
alignItems: 'center',
paddingVertical: 10,
paddingHorizontal: 20,
borderBottomWidth: 1,
borderBottomColor: '#CCC',
}, recordingText: {
color: '#000',
}, microphoneButton: {
backgroundColor: '#ff4757', borderRadius: 50, padding: 20, position: 'absolute', right: 20, bottom: 20,
}, playbackControls: {
flexDirection: 'row', justifyContent: 'center', alignItems: 'center', marginTop: 20,
}, controlButton: {
backgroundColor: '#007bff', borderRadius: 50, padding: 20, marginHorizontal: 20, alignItems: 'center',
}, controlText: {
color: '#FFF', marginTop: 8,
},
});
export default App;
This is the error that I receive when I try to transcribe it:
Transcription result: {"error": {"code": null, "message": "1 validation error for Request
body -> file
field required (type=value_error.missing)", "param": null, "type": "invalid_request_error"}}
Also, I’ve tried to use react native file system. In this case I want to add the import:
import RNFetchBlob from "rn-fetch-blob";
I had the code for this but I ended up not using it since the import didn’t work.
The error code that I received was this:
Cannot read property 'DocumentDir' of null.
Perhaps it’s useless but if it’s better than my current function please let me know how to fix this.
TeamAlmelo is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.