Thiết kế website giá rẻ

Question

I’m trying to encode frames from an audio buffer to Opus data in a Swift iOS app, and send them to be encoded on a server by Whisper.

The error I keep getting back from the server is opuslib.exceptions.OpusError: b'buffer too small'. I’ve noticed that once the encoding happens on the iOS side, I see this log:

Encoded 8 bytes of data., which definitely looks too small.

Below is the code for creating the audio sessions and encoding the data:

class AudioSessionManager: ObservableObject {
    private var audioEngine: AVAudioEngine?
    private weak var appManager: AppManager?
    private var canSendData: Bool = false
    static let shared = AudioSessionManager(appManager: AppManager.shared)

    init(appManager: AppManager) {
        self.appManager = appManager
        setupAudioSession()
    }

    // MARK: - Audio Session Setup

    private func setupAudioSession() {
        let session = AVAudioSession.sharedInstance()
        do {
            try session.setPreferredSampleRate(48000)
            try session.setPreferredInputNumberOfChannels(1)
            try session.setCategory(.record, mode: .default, options: [])
            try session.setActive(true)
            print("Audio session setup complete with sample rate 48000 Hz and mono channel.")
        } catch {
            print("Failed to set up audio session: (error)")
        }
    }
    
    
    // MARK: - Audio Session Setup

    func checkMicrophonePermission() -> Bool {
        return AVAudioSession.sharedInstance().recordPermission == .granted
    }

    func requestMicrophoneAccess(completion: @escaping (Bool) -> Void) {
        let audioSession = AVAudioSession.sharedInstance()
        
        switch audioSession.recordPermission {
        case .granted:
            completion(true)
        case .denied:
            completion(false)
        case .undetermined:
            audioSession.requestRecordPermission { granted in
                DispatchQueue.main.async {
                    completion(granted)
                }
            }
        @unknown default:
            completion(false)
        }
    }

    // MARK: - Audio Engine Setup

    func setupAudioEngine() {
        audioEngine = AVAudioEngine()
        guard let audioEngine = audioEngine else {
            print("Audio engine could not be initialized")
            return
        }
        
        let inputNode = audioEngine.inputNode
        let mixerNode = AVAudioMixerNode()
        audioEngine.attach(mixerNode)
       // [120, 240, 480, 960, 1920, 2880
        let opusCompatibleBufferSize: AVAudioFrameCount = 2880

        let desiredFormat = AVAudioFormat(standardFormatWithSampleRate: 48000, channels: 1)
        audioEngine.connect(inputNode, to: mixerNode, format: inputNode.inputFormat(forBus: 0))

        mixerNode.installTap(onBus: 0, bufferSize: opusCompatibleBufferSize, format: desiredFormat) { [weak self] (buffer, when) in
            self?.bufferAudioData(buffer)
        }
    }

    private func bufferAudioData(_ buffer: AVAudioPCMBuffer) {
        if buffer.format.sampleRate != 48000 || buffer.format.channelCount != 1 {
            print("Buffer format mismatch: Expected 48000 Hz, 1 channel, but got (buffer.format.sampleRate) Hz, (buffer.format.channelCount) channels")
            return
        }
        AudioStreamManager.shared.canSendData = canSendData
        AudioStreamManager.shared.process(buffer)
    }

    // MARK: - Recording Control

    func startRecording() {
        print("Starting recording...")
        canSendData = true
        setupAudioEngine()
        WebSocketManager.shared.sendCommand("aq_start")

        do {
            try audioEngine?.start()
            print("Audio engine started.")
        } catch {
            print("Failed to start audio engine: (error)")
        }
    }

    func stopRecording() {
        print("Stopping recording...")
        canSendData = false
        audioEngine?.stop()
        audioEngine = nil
        WebSocketManager.shared.sendCommand("aq_stop")
        print("Recording stopped.")
    }

}

This is where I’m handling audio stream encoding and sending:


import Foundation
import AVFoundation
import Opus

class AudioStreamManager {
    static let shared = AudioStreamManager(appManager: AppManager.shared)
    
    var storage: ContiguousArray<Float> = []
    var opusEncoder: Opus.Encoder?
    var opusDecoder: Opus.Decoder?
    var canSendData: Bool = false
    var appManager: AppManager
    
    private let audioEngine = AVAudioEngine()
    private let audioPlayerNode = AVAudioPlayerNode()
    
    init(appManager: AppManager) {
        self.appManager = appManager
        setupOpusEncoder()
        setupOpusDecoder()
        setupAudioEngine()
    }

    // MARK: - Audio Processing

    func process(_ buffer: AVAudioPCMBuffer) {
        buffer.floatChannelData?.withMemoryRebound(to: Float.self, capacity: Int(buffer.frameLength)) { pointer in
            for i in 0..<Int(buffer.frameLength) {
                storage.append(pointer[i])
            }
        }
        
        checkOutput()
    }
    
    private func checkOutput() {
        let desiredSize = 2880
        while storage.count >= desiredSize {
            let outArray: ContiguousArray<Float> = ContiguousArray<Float>(storage[0...(desiredSize-1)])
            if let outBuffer = createPCMBuffer(from: outArray, sampleRate: 48000, channelCount: 1) {
                send(buffer: outBuffer)
            }
            storage.removeFirst(desiredSize)
        }
    }
    
    private func send(buffer: AVAudioPCMBuffer) {
        guard let encoder = opusEncoder else {
            print("Opus encoder not initialized")
            return
        }
        
        var opusData = Data(count: 1500)

        do {
            let bytesEncoded = try encoder.encode(buffer, to: &opusData)
            print("Encoded (bytesEncoded) bytes of data.")

            if !opusData.isEmpty && canSendData {
                WebSocketManager.shared.send(audioData: opusData, frameSize: UInt32(opusData.count)) {
                    print("Opus encoded audio data sent.")
                }
            }
        } catch let error as Opus.Error {
            print("Failed to encode audio: Opus Error (error.rawValue) - (interpretOpusError(error))")
        } catch {
            print("Failed to encode audio: (error)")
        }
    }

    // MARK: - Opus Setup

    private func setupOpusEncoder() {
        let sampleRate = 48000.0
        let channels = 1

        guard let opusFormat = AVAudioFormat(opusPCMFormat: .float32, sampleRate: sampleRate, channels: AVAudioChannelCount(channels)) else {
            print("Invalid audio format parameters")
            return
        }

        do {
            opusEncoder = try Opus.Encoder(format: opusFormat)
            print("Opus encoder successfully created")
        } catch {
            print("Failed to create Opus encoder: (error)")
        }
    }
    
    private func setupOpusDecoder() {
        let sampleRate = 48000.0
        let channels = 1

        guard let opusFormat = AVAudioFormat(opusPCMFormat: .float32, sampleRate: sampleRate, channels: AVAudioChannelCount(channels)) else {
            print("Invalid audio format parameters")
            return
        }

        do {
            opusDecoder = try Opus.Decoder(format: opusFormat)
            print("Opus decoder successfully created")
        } catch {
            print("Failed to create Opus decoder: (error)")
        }
    }

    func decodeOpusData(_ opusData: Data) -> AVAudioPCMBuffer? {
        guard let opusDecoder = opusDecoder else {
            print("Opus decoder not initialized")
            return nil
        }

//        do {
//            print("Attempting to decode Opus data of length: (opusData.count)")
//            let decodedAudioBuffer = try opusDecoder.decode(opusData)
//            print("Successfully decoded Opus data")
//            return decodedAudioBuffer
//        } catch let error as Opus.Error {
//            print("Failed to decode audio: Opus Error (error.rawValue) - (interpretOpusError(error))")
//        } catch {
//            print("Failed to decode audio: (error)")
//        }
        return nil
    }

    private func interpretOpusError(_ error: Opus.Error) -> String {
        switch error {
        case .ok:
            return "No error."
        case .badArgument:
            return "One or more invalid/out of range arguments."
        case .bufferTooSmall:
            return "The mode struct passed is invalid."
        case .internalError:
            return "An internal error was detected."
        case .invalidPacket:
            return "The compressed data passed is corrupted."
        case .unimplemented:
            return "Invalid/unsupported request number."
        case .invalidState:
            return "An encoder or decoder structure is invalid or already freed."
        case .allocationFailure:
            return "Memory allocation has failed."
        default:
            return "Unknown error."
        }
    }
    
    // MARK: - Audio Engine Setup

    private func setupAudioEngine() {
        audioEngine.attach(audioPlayerNode)
        let format = AVAudioFormat(standardFormatWithSampleRate: 48000, channels: 1)!
        audioEngine.connect(audioPlayerNode, to: audioEngine.mainMixerNode, format: format)
        
        do {
            try audioEngine.start()
            print("Audio engine started")
        } catch {
            print("Failed to start audio engine: (error)")
        }
    }
    
    func playDecodedAudio(_ buffer: AVAudioPCMBuffer) {
        audioPlayerNode.scheduleBuffer(buffer, completionHandler: nil)
        if !audioPlayerNode.isPlaying {
            audioPlayerNode.play()
        }
    }
}

// MARK: - Utility Function

func createPCMBuffer(from floatArray: ContiguousArray<Float>, sampleRate: Double, channelCount: AVAudioChannelCount) -> AVAudioPCMBuffer? {
    guard let format = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: channelCount) else {
        print("Failed to create AVAudioFormat")
        return nil
    }
    
    let frameCount = AVAudioFrameCount(floatArray.count / Int(channelCount))
    
    guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount) else {
        print("Failed to create AVAudioPCMBuffer")
        return nil
    }
    
    pcmBuffer.frameLength = frameCount
    let channels = UnsafeBufferPointer(start: pcmBuffer.floatChannelData, count: Int(channelCount))
    for channelIndex in 0..<Int(channelCount) {
        let channel = channels[channelIndex]
        let stride = Int(channelCount)
        for frameIndex in 0..<Int(frameCount) {
            channel[frameIndex] = floatArray[frameIndex * stride + channelIndex]
        }
    }
    
    return pcmBuffer
}

Here’s where I’m handling the sending of the audio data via a Websocket:

  func send(audioData: Data, frameSize: UInt32, completion: @escaping () -> Void) {
        if isConnected {
            let framePrefix = "frame"
            var packetData = Data()
            packetData.append(framePrefix.data(using: .utf8)!)
            var frameSizeBytes = frameSize.littleEndian
            packetData.append(Data(bytes: &frameSizeBytes, count: MemoryLayout<UInt32>.size))
            packetData.append(audioData)
            socket?.write(data: packetData, completion: completion)
        } else {
            print("WebSocket is not connected")
        }
    }

Thiết kế website giá rẻ

Danh mục

Issue with Encoding Audio Buffer Data to Opus Size Being Incorrect in iOS Swift App