AVAudioConverter Opus

Question

I try to build VoIP application on iOS with echo cancellation. For AEC as I understand I need to use Audio Units. The main problem is how to use AVAudioConverter to encode microphone data to Opus?

opusASBD = AudioStreamBasicDescription(mSampleRate: 48000.0,
                                                    mFormatID: kAudioFormatOpus,
                                                    mFormatFlags: 0,
                                                    mBytesPerPacket: 0,
                                                    mFramesPerPacket: 2880,
                                                    mBytesPerFrame: 0,
                                                    mChannelsPerFrame: 1,
                                                    mBitsPerChannel: 0,
                                                    mReserved: 0)

        decoderOutputASBD = AudioStreamBasicDescription(mSampleRate: 48000.0,
                                                             mFormatID: kAudioFormatLinearPCM,
                                                             mFormatFlags: kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved,
                                                             mBytesPerPacket: 2,
                                                             mFramesPerPacket: 1,
                                                             mBytesPerFrame: 2,
                                                             mChannelsPerFrame: 1,
                                                             mBitsPerChannel: 16,
                                                             mReserved: 0)

        self.converterSpeaker = AVAudioConverter(from: AVAudioFormat(streamDescription: &opusASBD)!,
                                                 to: AVAudioFormat(streamDescription: &decoderOutputASBD)!)

        self.converterMic = AVAudioConverter(from: AVAudioFormat(streamDescription: &decoderOutputASBD)!,
                                             to: AVAudioFormat(streamDescription: &opusASBD)!)
        self.converterMic?.bitRate = 48000

var inDesc = AudioComponentDescription(componentType: kAudioUnitType_Output,
                                               componentSubType: kAudioUnitSubType_VoiceProcessingIO,
                                               componentManufacturer: kAudioUnitManufacturer_Apple,
                                               componentFlags: 0,
                                               componentFlagsMask: 0)
        if let inputComponent = AudioComponentFindNext(nil, &inDesc) {
            let status = AudioComponentInstanceNew(inputComponent, &self.audioUnit)
            if status == noErr {
                var flag = UInt32(1)

                AudioUnitSetProperty(self.audioUnit,
                                     kAudioOutputUnitProperty_EnableIO,
                                     kAudioUnitScope_Input,
                                     1,
                                     &flag,
                                     UInt32(MemoryLayout<UInt32>.size))

                AudioUnitSetProperty(self.audioUnit,
                                     kAudioOutputUnitProperty_EnableIO,
                                     kAudioUnitScope_Output,
                                     0,
                                     &flag,
                                     UInt32(MemoryLayout<UInt32>.size))

                AudioUnitSetProperty(self.audioUnit,
                                     kAUVoiceIOProperty_VoiceProcessingEnableAGC,
                                     kAudioUnitScope_Global,
                                     0,
                                     &flag,
                                     UInt32(MemoryLayout<UInt32>.size))

                AudioUnitSetProperty(self.audioUnit,
                                     kAudioUnitProperty_StreamFormat,
                                     kAudioUnitScope_Input,
                                     0,
                                     &decoderOutputASBD,
                                     UInt32(MemoryLayout<AudioStreamBasicDescription>.size))

                AudioUnitSetProperty(self.audioUnit,
                                     kAudioUnitProperty_StreamFormat,
                                     kAudioUnitScope_Output,
                                     1,
                                     &decoderOutputASBD,
                                     UInt32(MemoryLayout<AudioStreamBasicDescription>.size))

                var iCallback = AURenderCallbackStruct(inputProc: inputCallback,
                                                       inputProcRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()))
                AudioUnitSetProperty(self.audioUnit,
                                     kAudioOutputUnitProperty_SetInputCallback,
                                     kAudioUnitScope_Global,
                                     1,
                                     &iCallback,
                                     UInt32(MemoryLayout<AURenderCallbackStruct>.size))

                var rCallback = AURenderCallbackStruct(inputProc: renderCallback,
                                                       inputProcRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()))
                AudioUnitSetProperty(self.audioUnit,
                                     kAudioUnitProperty_SetRenderCallback,
                                     kAudioUnitScope_Global,
                                     0,
                                     &rCallback,
                                     UInt32(MemoryLayout<AURenderCallbackStruct>.size))

                AudioUnitInitialize(self.audioUnit)
                AudioOutputUnitStart(self.audioUnit)
}

I'm using ring buffer for audio data from https://github.com/michaeltyson/TPCircularBuffer

func inputCallback(_ inRefCon: UnsafeMutableRawPointer,
                               _ ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
                               _ inTimeStamp: UnsafePointer<AudioTimeStamp>,
                               _ inOutputBusNumber: UInt32,
                               _ inNumberFrames: UInt32,
                               _ ioData: UnsafeMutablePointer<AudioBufferList>?) -> OSStatus {
    let wSelf: AudioUnits = Unmanaged.fromOpaque(inRefCon).takeUnretainedValue()
    var buffers = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer())

    AudioUnitRender(wSelf.audioUnit,
                    ioActionFlags,
                    inTimeStamp,
                    inOutputBusNumber,
                    inNumberFrames,
                    &buffers)

    TPCircularBufferCopyAudioBufferList(&wSelf.ringBufferMic,
                                        &buffers,
                                        inTimeStamp,
                                        inNumberFrames,
                                        &wSelf.decoderOutputASBD)

    wSelf.handleMic(inNumberFrames, inTimeStamp: inTimeStamp.pointee)

    return noErr
}

func renderCallback(_ inRefCon: UnsafeMutableRawPointer,
                                _ ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
                                _ inTimeStamp: UnsafePointer<AudioTimeStamp>,
                                _ inOutputBusNumber: UInt32,
                                _ inNumberFrames: UInt32,
                                _ ioData: UnsafeMutablePointer<AudioBufferList>?) -> OSStatus {
    let wSelf: AudioUnits = Unmanaged.fromOpaque(inRefCon).takeUnretainedValue()

    if let data = ioData {
        let audioBufferList = UnsafeMutableAudioBufferListPointer(data)

        if let buffer = audioBufferList.first {
            buffer.mData?.assumingMemoryBound(to: Float32.self).assign(repeating: 0, count: Int(inNumberFrames))
        }

        var ioLengthInFrames = inNumberFrames
        TPCircularBufferDequeueBufferListFrames(&wSelf.ringBufferSpeaker,
                                                &ioLengthInFrames,
                                                ioData!,
                                                nil,
                                                &wSelf.decoderOutputASBD)
    }

    return noErr
}

In microphone handler I just encoding to Opus then decoding and trying to render decoded audio data (DEBUG). But my voice is corrupted

func handleMic(_ frames: UInt32, inTimeStamp: AudioTimeStamp) {
        var ioLengthInFrames = frames
        var its = inTimeStamp

        self.inputBufferMic = AVAudioPCMBuffer(pcmFormat: AVAudioFormat(streamDescription: &self.decoderOutputASBD)!,
                                               frameCapacity: ioLengthInFrames)!
        self.inputBufferMic.frameLength = self.inputBufferMic.frameCapacity

        TPCircularBufferDequeueBufferListFrames(&self.ringBufferMic,
                                                &ioLengthInFrames,
                                                self.inputBufferMic.mutableAudioBufferList,
                                                &its,
                                                &self.decoderOutputASBD)

        self.outputBufferMic = AVAudioCompressedBuffer(format: AVAudioFormat(streamDescription: &self.opusASBD)!,
                                                       packetCapacity: 1,
                                                       maximumPacketSize: 960)

        var error: NSError?
        self.converterMic?.convert(to: self.outputBufferMic,
                                   error: &error,
                                   withInputFrom: { [weak self] (packetCount, outputStatus) -> AVAudioBuffer? in
                                    outputStatus.pointee = .haveData

                                    return self?.inputBufferMic
        })
        if let e = error {
            LoggerManager.sharedInstance.log("<AudioUnits>: OPUS encoding error:\n \(e)")

            return
        }

        let mData = NSData(bytes: self.outputBufferMic.data,
                            length: Int(self.outputBufferMic.byteLength))

        self.inputBufferSpeaker = AVAudioCompressedBuffer(format: AVAudioFormat(streamDescription: &self.opusASBD)!,
                                                          packetCapacity: 1,
                                                          maximumPacketSize: Int(AudioUnits.frameSize))

        self.outputBufferSpeaker = AVAudioPCMBuffer(pcmFormat: AVAudioFormat(streamDescription: &self.decoderOutputASBD)!,
                                                    frameCapacity: AVAudioFrameCount(AudioUnits.frameSize))!
        self.outputBufferSpeaker.frameLength = self.outputBufferSpeaker.frameCapacity

        memcpy(self.inputBufferSpeaker.data, mData.bytes.bindMemory(to: UInt8.self, capacity: 1), mData.length)
        self.inputBufferSpeaker.byteLength = UInt32(mData.length)
        self.inputBufferSpeaker.packetCount = AVAudioPacketCount(1)
        self.inputBufferSpeaker.packetDescriptions![0].mDataByteSize = self.inputBufferSpeaker.byteLength

        self.converterSpeaker?.convert(to: self.outputBufferSpeaker,
                                       error: &error,
                                       withInputFrom: { [weak self] (packetCount, outputStatus) -> AVAudioBuffer? in
                                        outputStatus.pointee = .haveData

                                        return self?.inputBufferSpeaker
        })
        if let e = error {
            LoggerManager.sharedInstance.log("<AudioUnits>: OPUS decoding error:\n \(e)")

            return
        }

        TPCircularBufferCopyAudioBufferList(&self.ringBufferSpeaker,
                                            &self.outputBufferSpeaker.mutableAudioBufferList.pointee,
                                            nil,
                                            AudioUnits.frameSize,
                                            &self.decoderOutputASBD)
    }

Were you able to achieve the conversion to OPUS and filter out the echo? How was your approach? — bobski, Jul 31 '20 at 12:21

AVAudioConverter Opus

0 Answers0