My code can successfully extract all the NAL units of a H264 stream, that is packed into an Avi-File. I can also parse the SPS, PPS and the NAL unit types 1 and 5. I then extract a whole GOP (group of pictures), starting with the SPS and PPS, followed by the IDR NAL unit and ends with the last non IDR NAL unit before the next SPS.
I then reorder the NAL units according to 8.2 of the specification to get the correct PicOrderCnt (to pass it as the playing timestamp, PTS).
So I have an array with the NAL units of the GOP and a second array with the PTS of the NAL units.
I then change the start code to the AVC format (length of the NAL unit).
After that, I put the SPS and PPS without the start code to the CMVideoFormatDescriptionCreateFromH264ParameterSets
.
Afterwards I put all the NAL units ([UInt8]-array) to the CMSampleBuffer
, using the PicOrderCnt as the CMSampleTimingInfo
.
My code then successfully decodes the video frames when using VTDecompressionSessionDecodeFrame
.
Unfortunately, some GOP don't work, for some frames I get the error kVTVideoDecoderBadDataErr
and I can't explain why.
For example: My actual group is starting by DTS (decoding time stamp) 770, what is a keyframe. This is from my debug print:
DTS: 770 | PTS: 771 | NAL-Type 5: frame_num: 0 // slice_type: 7 // pic_order_cnt_lsb: 0 | PicOrderCnt: 0
DTS: 771 | PTS: 773 | NAL-Type 1: frame_num: 1 // slice_type: 5 // pic_order_cnt_lsb: 4 | PicOrderCnt: 4
DTS: 772 | PTS: 772 | NAL-Type 1: frame_num: 2 // slice_type: 6 // pic_order_cnt_lsb: 2 | PicOrderCnt: 2
DTS: 773 | PTS: 776 | NAL-Type 1: frame_num: 2 // slice_type: 5 // pic_order_cnt_lsb: 10 | PicOrderCnt: 10
DTS: 774 | PTS: 775 | NAL-Type 1: frame_num: 3 // slice_type: 6 // pic_order_cnt_lsb: 8 | PicOrderCnt: 8
DTS: 775 | PTS: 774 | NAL-Type 1: frame_num: 4 // slice_type: 6 // pic_order_cnt_lsb: 6 | PicOrderCnt: 6
DTS: 776 | PTS: 779 | NAL-Type 1: frame_num: 4 // slice_type: 5 // pic_order_cnt_lsb: 16 | PicOrderCnt: 16
...
DTS: 815 | PTS: 818 | NAL-Type 1: frame_num: 14 // slice_type: 5 // pic_order_cnt_lsb: 30 | PicOrderCnt: 94
DTS: 816 | PTS: 817 | NAL-Type 1: frame_num: 15 // slice_type: 6 // pic_order_cnt_lsb: 28 | PicOrderCnt: 92
DTS: 817 | PTS: 816 | NAL-Type 1: frame_num: 0 // slice_type: 6 // pic_order_cnt_lsb: 26 | PicOrderCnt: 90
DTS: 818 | PTS: 821 | NAL-Type 1: frame_num: 0 // slice_type: 5 // pic_order_cnt_lsb: 36 | PicOrderCnt: 100
DTS: 819 | PTS: 820 | NAL-Type 1: frame_num: 1 // slice_type: 6 // pic_order_cnt_lsb: 34 | PicOrderCnt: 98
DTS: 820 | PTS: 819 | NAL-Type 1: frame_num: 2 // slice_type: 6 // pic_order_cnt_lsb: 32 | PicOrderCnt: 96
DTS: 821 | PTS: 824 | NAL-Type 1: frame_num: 2 // slice_type: 5 // pic_order_cnt_lsb: 42 | PicOrderCnt: 106
DTS: 822 | PTS: 823 | NAL-Type 1: frame_num: 3 // slice_type: 6 // pic_order_cnt_lsb: 40 | PicOrderCnt: 104
DTS: 823 | PTS: 822 | NAL-Type 1: frame_num: 4 // slice_type: 6 // pic_order_cnt_lsb: 38 | PicOrderCnt: 102
DTS: 824 | PTS: 827 | NAL-Type 1: frame_num: 4 // slice_type: 5 // pic_order_cnt_lsb: 48 | PicOrderCnt: 112
DTS: 825 | PTS: 826 | NAL-Type 1: frame_num: 5 // slice_type: 6 // pic_order_cnt_lsb: 46 | PicOrderCnt: 110
I get errors starting at PTS 819.
This is my code:
func decodeGroup(_ group: AviH264Analyzer.GOP, fps: Double) {
DispatchQueue(label: "decode").async {
let sps = group.spsNAL.bytesWithoutStartCode
let pps = group.ppsNAL.bytesWithoutStartCode
var formatDesc: CMVideoFormatDescription?
var status = sps.withUnsafeBufferPointer { spsPtr in
pps.withUnsafeBufferPointer { ppsPtr in
let paramSet = [spsPtr.baseAddress!, ppsPtr.baseAddress!]
let paramSizes = [sps.count, pps.count]
return paramSet.withUnsafeBufferPointer { paramSetPtr in
paramSizes.withUnsafeBufferPointer { paramSizesPtr in
CMVideoFormatDescriptionCreateFromH264ParameterSets(allocator: nil,
parameterSetCount: 2,
parameterSetPointers: paramSetPtr.baseAddress!,
parameterSetSizes: paramSizesPtr.baseAddress!,
nalUnitHeaderLength: 4,
formatDescriptionOut: &formatDesc)
}
}
}
}
var callback = VTDecompressionOutputCallbackRecord()
callback.decompressionOutputCallback = { (_, _, status, _, imageBuffer, pts, _) in
if let imageBuffer {
let ciImage = CIImage(cvImageBuffer: imageBuffer)
if let cgImage = CIContext().createCGImage(ciImage, from: ciImage.extent) {
let rep = NSBitmapImageRep(cgImage: cgImage)
if let imgData = rep.representation(using: .png, properties: [:]), let nsImage = NSImage(data: imgData) {
let frameNumber = Int(pts.value)-1
if !VideoBuffer.shared.buffer.map({ $0.frameNumber }).contains(frameNumber) {
VideoBuffer.shared.buffer.append(VideoFrame(frameNumber: frameNumber, image: nsImage))
}
}
}
}
}
let decoderParameters = NSMutableDictionary()
let destinationPixelBufferAttributes = NSMutableDictionary()
destinationPixelBufferAttributes.setValue(
NSNumber(value: kCVPixelFormatType_32ARGB),
forKey: kCVPixelBufferPixelFormatTypeKey as String
)
var decompressionSession: VTDecompressionSession?
status = VTDecompressionSessionCreate(allocator: kCFAllocatorDefault,
formatDescription: formatDesc!,
decoderSpecification: decoderParameters,
imageBufferAttributes: destinationPixelBufferAttributes,
outputCallback: &callback,
decompressionSessionOut: &decompressionSession)
if status != noErr {
handleStatus(status)
} else {
print("DecompressionSession sucessfully created")
}
let nalus = group.nalus
self.decodeNALUnits(nalus: nalus,
order: group.order,
fps: fps,
formatDesc: formatDesc!,
decompressionSession: decompressionSession!)
}
}
func decodeNALUnits(nalus: [PictureNAL], order: [Int], fps: Double, formatDesc: CMVideoFormatDescription, decompressionSession: VTDecompressionSession) {
var videoData = [UInt8]()
var sampleSizeArray = [Int]()
for nalu in nalus {
var bytes = nalu.bytesWithoutStartCode
// the length of the NALU
var bigLen = CFSwapInt32HostToBig(UInt32(bytes.count))
bytes.insert(contentsOf: withUnsafeBytes(of: &bigLen, { Array($0) }), at: 0)
videoData += bytes
sampleSizeArray.append(bytes.count)
}
var blockBuffer: CMBlockBuffer?
let count = videoData.count
var status = videoData.withUnsafeMutableBufferPointer { bufferPointer in
return CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault,
memoryBlock: bufferPointer.baseAddress!,
blockLength: count,
blockAllocator: kCFAllocatorNull,
customBlockSource: nil,
offsetToData: 0,
dataLength: count,
flags: 0,
blockBufferOut: &blockBuffer)
}
if status != noErr {
handleStatus(status)
} else {
print("CMBlockBufferCreateWithMemoryBlock sucess")
}
let frameDuration = CMTimeMake(value: 1, timescale: Int32(fps))
var timing = [CMSampleTimingInfo]()
for i in 0..<nalus.count {
let pts = order[i]
let presentationTime = CMTimeMake(value: Int64(pts), timescale: Int32(fps))
let timingInfo = CMSampleTimingInfo(duration: frameDuration,
presentationTimeStamp: presentationTime,
decodeTimeStamp: CMTime.invalid)
timing.append(timingInfo)
}
var sampleBuffer: CMSampleBuffer?
status = CMSampleBufferCreateReady(allocator: kCFAllocatorDefault,
dataBuffer: blockBuffer,
formatDescription: formatDesc,
sampleCount: sampleSizeArray.count,
sampleTimingEntryCount: timing.count,
sampleTimingArray: &timing,
sampleSizeEntryCount: sampleSizeArray.count,
sampleSizeArray: sampleSizeArray,
sampleBufferOut: &sampleBuffer)
if status != noErr {
handleStatus(status)
} else {
print("CMSampleBufferCreateReady sucess")
}
guard let buffer = sampleBuffer else {
print("Could not unwrap sampleBuffer!")
return
}
var outputBuffer: CVPixelBuffer?
status = VTDecompressionSessionDecodeFrame(decompressionSession,
sampleBuffer: buffer,
flags: [._EnableAsynchronousDecompression, ._EnableTemporalProcessing],
frameRefcon: &outputBuffer,
infoFlagsOut: nil)
if status != noErr {
print(status)
handleStatus(status)
} else {
print("VTDecompressionSessionDecodeFrame sucess")
}
}