I'm trying to get all the text found in an UIImage
using VisionKit
and get the location of a keyword (if it exists) in the image. So far I've got this:
var detectedText = ""
var textRecognitionRequest = VNRecognizeTextRequest(completionHandler: nil)
let textRecognitionWorkQueue = DispatchQueue(label: "TextRecognitionQueue", qos: .userInitiated, attributes: [], autoreleaseFrequency: .workItem)
private func recognizeTextInImage(_ image: UIImage?) {
guard let cgImage = image?.cgImage else { return }
textRecognitionWorkQueue.async {
let requestHandler = VNImageRequestHandler(cgImage: cgImage, options: [:])
do {
try requestHandler.perform([self.textRecognitionRequest])
} catch {
// You should handle errors appropriately in your app.
print(error)
}
}
}
And in viewDidLoad
:
override func viewDidLoad() {
super.viewDidLoad()
let imgData = object.scannedImage ?? Data()
recognizeTextInImage(UIImage(data: imgData, scale: 1.0))
textRecognitionRequest.recognitionLevel = .accurate
textRecognitionRequest.usesLanguageCorrection = true
textRecognitionRequest.recognitionLanguages = ["en-US"]
textRecognitionRequest.customWords = ["KEYWORD"]
textRecognitionRequest = VNRecognizeTextRequest { (request, error) in
guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
for observation in observations {
guard let topCandidate = observation.topCandidates(1).first else { return }
self.detectedText += topCandidate.string
self.detectedText += " "
if topCandidate.string == "KEYWORD" {
let boundingBox = observation.boundingBox
guard let imageData = object.scannedImage else { return }
let imgSize = UIImage(data: imageData)!.size
let rect = CGRect(x: boundingBox.minX * imgSize.width,
y: boundingBox.minY * imgSize.height,
width: boundingBox.width * imgSize.width,
height: boundingBox.height * imgSize.height)
print(rect)
}
}
}
}
But the detection of the boundingBox
really slows down the process of finding all the text in the image and it gets really inaccurate and the printing of the rect never gets called.
Is there a better way of doing this?