3

I'm trying to get all the text found in an UIImage using VisionKit and get the location of a keyword (if it exists) in the image. So far I've got this:

    var detectedText = ""
    var textRecognitionRequest = VNRecognizeTextRequest(completionHandler: nil)
    let textRecognitionWorkQueue = DispatchQueue(label: "TextRecognitionQueue", qos: .userInitiated, attributes: [], autoreleaseFrequency: .workItem)

    private func recognizeTextInImage(_ image: UIImage?) {
        guard let cgImage = image?.cgImage else { return }

        textRecognitionWorkQueue.async {
            let requestHandler = VNImageRequestHandler(cgImage: cgImage, options: [:])
            do {
                try requestHandler.perform([self.textRecognitionRequest])
            } catch {
                // You should handle errors appropriately in your app.
                print(error)
            }
        }
    } 

And in viewDidLoad:

    override func viewDidLoad() {
        super.viewDidLoad()
        let imgData = object.scannedImage ?? Data()
        recognizeTextInImage(UIImage(data: imgData, scale: 1.0))

        textRecognitionRequest.recognitionLevel = .accurate
        textRecognitionRequest.usesLanguageCorrection = true
        textRecognitionRequest.recognitionLanguages = ["en-US"]
        textRecognitionRequest.customWords = ["KEYWORD"]
        textRecognitionRequest = VNRecognizeTextRequest { (request, error) in
            guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
            for observation in observations {
                guard let topCandidate = observation.topCandidates(1).first else { return }

                self.detectedText += topCandidate.string
                self.detectedText += " "

                if topCandidate.string == "KEYWORD" {
                    let boundingBox = observation.boundingBox
                    guard let imageData = object.scannedImage else { return }
                    let imgSize = UIImage(data: imageData)!.size
                    let rect = CGRect(x: boundingBox.minX * imgSize.width,
                                                   y: boundingBox.minY * imgSize.height,
                                                   width: boundingBox.width * imgSize.width,
                                                   height: boundingBox.height * imgSize.height)
                    print(rect)
                }
            }
        }
    }

But the detection of the boundingBox really slows down the process of finding all the text in the image and it gets really inaccurate and the printing of the rect never gets called.

Is there a better way of doing this?

Andy Jazz
  • 49,178
  • 17
  • 136
  • 220
perteadi
  • 13
  • 13

0 Answers0