3

I am trying to convert a determined AVAudioPCMBuffer (44.1khz, 1ch, float32, not interleaved) to another AVAudioPCMBuffer (16khz, 1ch, int16, not interleaved) using AVAudioConverter and write it using AVAudioFile. My code uses the library AudioKit together with the tap AKLazyTap to get a buffer each determined time, based on this source:

https://github.com/AudioKit/AudioKit/tree/master/AudioKit/Common/Taps/Lazy%20Tap

Here is my implementation:

lazy var downAudioFormat: AVAudioFormat = {
  let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!
  return AVAudioFormat(
      commonFormat: .pcmFormatInt16,
      sampleRate: 16000,
      interleaved: false,
      channelLayout: avAudioChannelLayout)
}()

//...
AKSettings.sampleRate = 44100
AKSettings.numberOfChannels = AVAudioChannelCount(1)
AKSettings.ioBufferDuration = 0.002
AKSettings.defaultToSpeaker = true

//...
let mic = AKMicrophone()
let originalAudioFormat: AVAudioFormat = mic.avAudioNode.outputFormat(forBus: 0) //41.100, 1ch, float32...
let inputFrameCapacity = AVAudioFrameCount(1024)
//I don't think this is correct, the audio is getting chopped... 
//How to calculate it correctly?
let outputFrameCapacity = AVAudioFrameCount(512)

guard let inputBuffer = AVAudioPCMBuffer(
  pcmFormat: originalAudioFormat,
  frameCapacity: inputFrameCapacity) else {
  fatalError()
}

// Your timer should fire equal to or faster than your buffer duration
bufferTimer = Timer.scheduledTimer(
  withTimeInterval: AKSettings.ioBufferDuration/2,
  repeats: true) { [weak self] _ in

  guard let unwrappedSelf = self else {
    return
  }

  unwrappedSelf.lazyTap?.fillNextBuffer(inputBuffer, timeStamp: nil)

  // This is important, since we're polling for samples, sometimes
  //it's empty, and sometimes it will be double what it was the last call.
  if inputBuffer.frameLength == 0 {
    return
  }

  //This converter is only create once, as the AVAudioFile. Ignore this code I call a function instead.
  let converter = AVAudioConverter(from: originalAudioFormat, to: downAudioFormat)
  converter.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Normal
  converter.sampleRateConverterQuality = .min
  converter.bitRateStrategy = AVAudioBitRateStrategy_Constant

  guard let outputBuffer = AVAudioPCMBuffer(
      pcmFormat: converter.outputFormat,
      frameCapacity: outputFrameCapacity) else {
    print("Failed to create new buffer")
    return
  }

  let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
    outStatus.pointee = AVAudioConverterInputStatus.haveData
    return inputBuffer
  }

  var error: NSError?
  let status: AVAudioConverterOutputStatus = converter.convert(
      to: outputBuffer,
      error: &error,
      withInputFrom: inputBlock)

  switch status {
  case .error:
    if let unwrappedError: NSError = error {
      print(unwrappedError)
    }
    return
  default: break
  }

  //Only created once, instead of this code my code uses a function to verify if the AVAudioFile has been created, ignore it.
  outputAVAudioFile = try AVAudioFile(
    forWriting: unwrappedCacheFilePath,
    settings: format.settings,
    commonFormat: format.commonFormat,
    interleaved: false)

  do {
    try outputAVAudioFile?.write(from: avAudioPCMBuffer)
  } catch {
    print(error)
  }

}

(Please note that AVAudioConverter and AVAudioFile are being reused, the initialization there doesn't represent the real implementation on my code, just to simplify and make it more simple to understand.)

With frameCapacity on the outputBuffer: AVAudioPCMBuffer set to 512, the audio get chopped. Is there any way to discovery the correct frameCapacity for this buffer?

Written using Swift 4 and AudioKit 4.1.

Many thanks!

Pedro Paulo Amorim
  • 1,838
  • 2
  • 27
  • 50

1 Answers1

3

I managed to solve this problem installing a Tap on the inputNode like this:

lazy var downAudioFormat: AVAudioFormat = {
  let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!
  return AVAudioFormat(
      commonFormat: .pcmFormatInt16,
      sampleRate: SAMPLE_RATE,
      interleaved: true,
      channelLayout: avAudioChannelLayout)
}()

private func addBufferListener(_ avAudioNode: AVAudioNode) {

  let originalAudioFormat: AVAudioFormat = avAudioNode.inputFormat(forBus: 0)
  let downSampleRate: Double = downAudioFormat.sampleRate
  let ratio: Float = Float(originalAudioFormat.sampleRate)/Float(downSampleRate)
  let converter: AVAudioConverter = buildConverter(originalAudioFormat)

  avAudioNode.installTap(
      onBus: 0,
      bufferSize: AVAudioFrameCount(downSampleRate * 2),
      format: originalAudioFormat,
      block: { (buffer: AVAudioPCMBuffer!, _ : AVAudioTime!) -> Void in

        let capacity = UInt32(Float(buffer.frameCapacity)/ratio)

        guard let outputBuffer = AVAudioPCMBuffer(
            pcmFormat: self.downAudioFormat,
            frameCapacity: capacity) else {
          print("Failed to create new buffer")
          return
        }

        let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
          outStatus.pointee = AVAudioConverterInputStatus.haveData
          return buffer
        }

        var error: NSError?
        let status: AVAudioConverterOutputStatus = converter.convert(
            to: outputBuffer,
            error: &error,
            withInputFrom: inputBlock)

        switch status {
        case .error:
          if let unwrappedError: NSError = error {
            print("Error \(unwrappedError)"))
          }
          return
        default: break
        }

        self.delegate?.flushAudioBuffer(outputBuffer)

  })

}
Pedro Paulo Amorim
  • 1,838
  • 2
  • 27
  • 50