I am using Core Vision to detect text boxes in an ARKit session, my problem is accessing the frame
to perform a hit test once I have detected the boxes.
func startTextDetection() {
let textRequest = VNDetectTextRectanglesRequest(completionHandler: self.detectTextHandler)
textRequest.reportCharacterBoxes = true
self.requests = [textRequest]
}
func detectTextHandler(request: VNRequest, error: Error?) {
guard let observations = request.results else {
print("no result")
return
}
let result = observations.map({$0 as? VNTextObservation})
for box in result {
let hit = frame.hitTest(box?.topRight - box?.bottomLeft, types: ARHitTestResult.ResultType.featurePoint )
let anchor = ARAnchor(transform:hit.worldTransform)
sceneView.session.add(anchor:anchor)
}
//DispatchQueue.main.async() {
//}
}
Ideally I would pass it to the completion handler from the ARSession
delegate method but although the documentation says I can pass a completion handler here, I hav not found a way to do it.
func session(_ session: ARSession, didUpdate frame: ARFrame) {
// Retain the image buffer for Vision processing.
let pixelBuffer = frame.capturedImage
let requestOptions:[VNImageOption : Any] = [:]
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: CGImagePropertyOrientation.up, options: requestOptions)
do {
try imageRequestHandler.perform(self.requests)
} catch {
print(error)
}
}
I can keep a dictionary and look it up but it is not really elegant and it is prone to bugs and leaks. I would rather pass the relevant frame where I request the text detection.
Any ideas?