I have made an MLModel in CreateML that will detect hockey pucks in images. I use the camera on the phone to take a video, and while it is being recorded, I convert each frame to a CGImage and try to detect pucks in each frame. At first when I received the memory crashes, I tried removing a trajectory detection I was running at the same time, however this made no change. When monitoring the memory usage during runtime, my app uses a small and consistent amount of memory; it is "Other processes" that goes over the limit, which is quite confusing. I also removed a for loop that filtered out objects with low confidence (below 0.5) but this does not have an effect either.
Being new to MLModel and machine learning, can anybody steer me in the right direction? Please let me know if any more details are needed, if I missed something.I will attach all of the code because it is only 100 lines or so, and it may be important for context. However, the initializeCaptureSession method and captureOutput method would probably be the ones to look at.
import UIKit
import AVFoundation
import ImageIO
import Vision
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate, AVCaptureAudioDataOutputSampleBufferDelegate {
var cameraPreviewLayer: AVCaptureVideoPreviewLayer?
var camera: AVCaptureDevice?
var microphone: AVCaptureDevice?
let session = AVCaptureSession()
var videoDataOutput = AVCaptureVideoDataOutput()
var audioDataOutput = AVCaptureAudioDataOutput()
@IBOutlet var trajectoriesLabel: UILabel!
@IBOutlet var pucksLabel: UILabel!
override func viewDidLoad() {
super.viewDidLoad()
initializeCaptureSession()
// Do any additional setup after loading the view.
}
// Lazily create a single instance of VNDetectTrajectoriesRequest.
private lazy var request: VNDetectTrajectoriesRequest = {
request.objectMinimumNormalizedRadius = 0.0
request.objectMaximumNormalizedRadius = 0.5
return VNDetectTrajectoriesRequest(frameAnalysisSpacing: .zero, trajectoryLength: 10, completionHandler: completionHandler)
}()
// AVCaptureVideoDataOutputSampleBufferDelegate callback.
func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
// Process the results.
do {
let requestHandler = VNImageRequestHandler(cmSampleBuffer: sampleBuffer)
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else{
print("cannot make pixelbuffer for image conversion")
return
}
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer)
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
let colorSpace = CGColorSpaceCreateDeviceRGB()
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedFirst.rawValue | CGBitmapInfo.byteOrder32Little.rawValue)
guard let context = CGContext(data: baseAddress, width: width, height: height, bitsPerComponent: 8, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo.rawValue) else{
print("cannot make context for image conversion")
return
}
guard let cgImage = context.makeImage() else{
print("cannot make cgimage for image conversion")
return
}
CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly)
let model = try VNCoreMLModel(for: PucksV7(configuration: MLModelConfiguration()).model)
let request = VNCoreMLRequest(model: model)
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
try? handler.perform([request])
guard let pucks = request.results as? [VNDetectedObjectObservation] else{
print("Could not convert detected pucks")
return
}
DispatchQueue.main.async {
self.pucksLabel.text = "Pucks: \(pucks.count)"
}
try requestHandler.perform([request])
} catch {
// Handle the error.
}
}
func completionHandler(request: VNRequest, error: Error?) {
//identify results
guard let observations = request.results as? [VNTrajectoryObservation] else { return }
// Process the results.
self.trajectoriesLabel.text = "Trajectories: \(observations.count)"
}
func initializeCaptureSession(){
session.sessionPreset = .hd1920x1080
camera = AVCaptureDevice.default(for: .video)
microphone = AVCaptureDevice.default(for: .audio)
do{
session.beginConfiguration()
//adding camera
let cameraCaptureInput = try AVCaptureDeviceInput(device: camera!)
if session.canAddInput(cameraCaptureInput){
session.addInput(cameraCaptureInput)
}
//output
let queue = DispatchQueue(label: "output")
if session.canAddOutput(videoDataOutput) {
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA]
videoDataOutput.setSampleBufferDelegate(self, queue: queue)
session.addOutput(videoDataOutput)
}
let captureConnection = videoDataOutput.connection(with: .video)
// Always process the frames
captureConnection?.isEnabled = true
do {
try camera!.lockForConfiguration()
camera!.unlockForConfiguration()
} catch {
print(error)
}
session.commitConfiguration()
cameraPreviewLayer = AVCaptureVideoPreviewLayer(session: session)
cameraPreviewLayer?.videoGravity = .resizeAspectFill
cameraPreviewLayer?.frame = view.bounds
cameraPreviewLayer?.connection?.videoOrientation = .landscapeRight
view.layer.insertSublayer(cameraPreviewLayer!, at: 0)
DispatchQueue.global(qos: .background).async {
self.session.startRunning()
}
} catch {
print(error.localizedDescription)
}
}
}