I am recording an audio and I want to add the current longitude and latitude when the audio was recorded. I am providing text to Google text to speech to get an audio file. Audio file that is recorded is in pcm format. When I convert that pcm to wav and merge the tts wav file to recorded (converted wav file), tts wav audio behaves same but recorded wav audio distorts
ttsWavFile = FileManager(requireContext()).getWavAudioFile()
tts.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(utteranceId: String?) {
// Optionally handle when TTS starts
}
override fun onDone(utteranceId: String?) {
mergeWavFiles(ttsWavFile, recorded, finalWavFile)
}
override fun onError(utteranceId: String?) {
Toast.makeText(requireContext(), "Error in TTS synthesis", Toast.LENGTH_SHORT)
.show()
}
})
val params = Bundle()
params.putInt(Engine.KEY_PARAM_STREAM, android.media.AudioManager.STREAM_MUSIC)
tts.synthesizeToFile(text, params, ttsWavFile, "tts1")
and here is the code to record and convert the recorded pcm audio file to wav
class AudioRecorder(
private val context: Context,
private val waveformView: WaveformView, private val onFileConversion: (File) -> Unit
) {
private var audioRecord: AudioRecord? = null
private var isRecording = false
private var isPaused = false
private val bufferSize = AudioRecord.getMinBufferSize(
44100,
android.media.AudioFormat.CHANNEL_IN_MONO,
android.media.AudioFormat.ENCODING_PCM_16BIT
)
private val handler = Handler(Looper.getMainLooper())
private var outputStream: FileOutputStream? = null
private lateinit var audioFile: File
fun startRecording() {
if (ActivityCompat.checkSelfPermission(
context,
Manifest.permission.RECORD_AUDIO
) != PackageManager.PERMISSION_GRANTED
) {
return
}
audioFile = FileManager(context).getPCMAudioFile()
outputStream = FileOutputStream(audioFile)
audioRecord = AudioRecord(
MediaRecorder.AudioSource.MIC,
44100,
android.media.AudioFormat.CHANNEL_IN_MONO,
android.media.AudioFormat.ENCODING_PCM_16BIT,
bufferSize
)
audioRecord?.startRecording()
isRecording = true
isPaused = false
Thread {
val buffer = ByteArray(bufferSize)
while (isRecording) {
if (!isPaused) {
val read = audioRecord?.read(buffer, 0, buffer.size) ?: 0
if (read > 0) {
outputStream?.write(buffer, 0, read)
}
val amplitude = calculateRMS(buffer)
handler.post {
waveformView.updateWaveform(amplitude)
}
}
}
}.start()
}
private fun calculateRMS(buffer: ByteArray): Float {
var sum = 0f
for (i in buffer.indices step 2) {
val sample = ((buffer[i + 1].toInt() shl 8) or buffer[i].toInt()).toShort()
sum += (sample * sample).toFloat()
}
return sqrt(sum / (buffer.size / 2))
}
fun pauseRecording() {
isPaused = true
}
fun resumeRecording() {
isPaused = false
}
fun stopRecording() {
isRecording = false
isPaused = false
audioRecord?.stop()
audioRecord?.release()
audioRecord = null
}
fun saveAudio() {
outputStream?.close()
outputStream = null
val pcmFile = audioFile
val wavFile = FileManager(context).getWavAudioFile()
val file = convertPcmToWav(pcmFile, wavFile, 44100, 2, 16)
onFileConversion(file)
}
private fun convertPcmToWav(
pcmFile: File,
wavFile: File,
sampleRate: Int,
channels: Int,
bitDepth: Int
): File {
val pcmData = pcmFile.readBytes()
val pcmSize = pcmData.size
val wavHeader = createWavHeader(pcmSize, sampleRate, channels, bitDepth)
FileOutputStream(wavFile).use { fos ->
fos.write(wavHeader)
fos.write(pcmData)
}
pcmFile.delete()
return wavFile
}
private fun createWavHeader(
dataSize: Int,
sampleRate: Int,
channels: Int,
bitDepth: Int
): ByteArray {
val totalDataLen = dataSize + 36
val byteRate = sampleRate * channels * bitDepth / 8
val header = ByteArray(44)
// RIFF/WAVE header
header[0] = 'R'.code.toByte()
header[1] = 'I'.code.toByte()
header[2] = 'F'.code.toByte()
header[3] = 'F'.code.toByte()
header[4] = (totalDataLen and 0xff).toByte()
header[5] = ((totalDataLen shr 8) and 0xff).toByte()
header[6] = ((totalDataLen shr 16) and 0xff).toByte()
header[7] = ((totalDataLen shr 24) and 0xff).toByte()
header[8] = 'W'.code.toByte()
header[9] = 'A'.code.toByte()
header[10] = 'V'.code.toByte()
header[11] = 'E'.code.toByte()
// fmt subchunk
header[12] = 'f'.code.toByte()
header[13] = 'm'.code.toByte()
header[14] = 't'.code.toByte()
header[15] = ' '.code.toByte()
header[16] = 16 // Subchunk1 size (16 for PCM)
header[17] = 0
header[18] = 0
header[19] = 0
header[20] = 1 // Audio format (1 for PCM)
header[21] = 0
header[22] = channels.toByte()
header[23] = 0
header[24] = (sampleRate and 0xff).toByte()
header[25] = ((sampleRate shr 8) and 0xff).toByte()
header[26] = ((sampleRate shr 16) and 0xff).toByte()
header[27] = ((sampleRate shr 24) and 0xff).toByte()
header[28] = (byteRate and 0xff).toByte()
header[29] = ((byteRate shr 8) and 0xff).toByte()
header[30] = ((byteRate shr 16) and 0xff).toByte()
header[31] = ((byteRate shr 24) and 0xff).toByte()
header[32] = (channels * bitDepth / 8).toByte() // Block align
header[33] = 0
header[34] = bitDepth.toByte()
header[35] = 0
// data subchunk
header[36] = 'd'.code.toByte()
header[37] = 'a'.code.toByte()
header[38] = 't'.code.toByte()
header[39] = 'a'.code.toByte()
header[40] = (dataSize and 0xff).toByte()
header[41] = ((dataSize shr 8) and 0xff).toByte()
header[42] = ((dataSize shr 16) and 0xff).toByte()
header[43] = ((dataSize shr 24) and 0xff).toByte()
return header
}
}
now when I try to merge these two audios, i am facing an issue that first audio that is textToSpeech audio is being merged fine but the second recorded audio is being distorted. below is the code merge two wavs
fun mergeWavFiles(wavFile1: File, wavFile2: File, outputFile: File) {
try {
// Open input streams for both WAV files
val inputStream1 = FileInputStream(wavFile1)
val inputStream2 = FileInputStream(wavFile2)
// Create output stream for the merged WAV file
val outputStream = FileOutputStream(outputFile)
// Read the headers of the input files
val header1 = ByteArray(44)
val header2 = ByteArray(44)
inputStream1.read(header1)
inputStream2.read(header2)
// Write the header from the first file to the output
outputStream.write(header1)
// Read audio data from both files and combine them
val buffer = ByteArray(1024)
var bytesRead: Int
// Read the first file after the header and write to the output
while (inputStream1.read(buffer).also { bytesRead = it } != -1) {
outputStream.write(buffer, 0, bytesRead)
}
// Read the second file after the header and write to the output
while (inputStream2.read(buffer).also { bytesRead = it } != -1) {
outputStream.write(buffer, 0, bytesRead)
}
// Close all streams
inputStream1.close()
inputStream2.close()
outputStream.close()
// Update the WAV header for the new file
updateWavHeader(outputFile)
wavFile1.delete()
wavFile2.delete()
dialog.dismiss()
} catch (e: IOException) {
e.printStackTrace()
println("Error merging WAV files: ${e.message}")
}
}
fun checkWavProperties(wavFile: File): Triple<Int, Int, Int>? {
try {
FileInputStream(wavFile).use { fis ->
val header = ByteArray(44)
fis.read(header)
val sampleRate = ((header[27].toInt() and 0xFF shl 24) or
(header[26].toInt() and 0xFF shl 16) or
(header[25].toInt() and 0xFF shl 8) or
(header[24].toInt() and 0xFF))
val channels = (header[23].toInt() and 0xFF shl 8) or (header[22].toInt() and 0xFF)
val bitDepth = (header[35].toInt() and 0xFF shl 8) or (header[34].toInt() and 0xFF)
return Triple(sampleRate, channels, bitDepth)
}
} catch (e: IOException) {
e.printStackTrace()
}
return null
}
// Function to update WAV header with the correct size information
fun updateWavHeader(wavFile: File) {
try {
val fileAccess = RandomAccessFile(wavFile, "rw")
val fileSize = fileAccess.length()
val dataSize = fileSize - 44
// Update ChunkSize (file size - 8) at offset 4
fileAccess.seek(4)
fileAccess.write(intToLittleEndian((fileSize - 8).toInt()))
// Update Subchunk2Size (data size) at offset 40
fileAccess.seek(40)
fileAccess.write(intToLittleEndian(dataSize.toInt()))
fileAccess.close()
} catch (e: IOException) {
e.printStackTrace()
}
}
// Helper function to convert an integer to little-endian byte array
fun intToLittleEndian(value: Int): ByteArray {
return byteArrayOf(
(value and 0xff).toByte(),
((value shr 8) and 0xff).toByte(),
((value shr 16) and 0xff).toByte(),
((value shr 24) and 0xff).toByte()
)
}
3