how to use the google cloud vision OCR on iOS (swift)

Question

How to adapt the swift google sample code of (FACE_DETECTION) and (LABEL_DETECTION) to use the OCR API (TEXT_DETECTION) so far I did not find any sample code or explanation how to use the google OCR on iOS, I have changed the type of the API request to TEXT_DETECTION but it does not gives a reply any help?

 class GoogleCloudVisionOCR {

        let session = URLSession.shared
        var googleAPIKey = ""
        var googleURL: URL {
            return URL(string: "https://vision.googleapis.com/v1/images:annotate?key=\(googleAPIKey)")!
        }
        var textFromImageArray: [String]?


        public func GetTextFromImage(imageURLString: String, handler: @escaping (String) -> Void) {
            guard let url = URL(string: imageURLString) else { print("no URL in TextFromImageRequest.GetTextFromImage"); return}
            let data = try? Data(contentsOf: url)
            if let unwrappedData = data {
                guard let image = UIImage(data: unwrappedData) else { print("no image from URL in TextFromImageRequest.GetTextFromImage"); return}

                let binaryImageData = base64EncodeImage(image)
                createRequest(with: binaryImageData, handler: { (result) in
                    //DEBUG PRINT print("RESULT = \(result)")

                    handler(result)
                })
            } else { print("unwrapped data nil in TextFromImageRequest.GetTextFromImage") }
        }


        // Base64 encode the image and create the request

        //createRequest(with: binaryImageData)


        private func base64EncodeImage(_ image: UIImage) -> String {
            var imagedata = UIImagePNGRepresentation(image)

            // Resize the image if it exceeds the 2MB API limit
            if ((imagedata?.count)! > 2097152) {
                let oldSize: CGSize = image.size
                let newSize: CGSize = CGSize(width: 800, height: oldSize.height / oldSize.width * 800)
                imagedata = resizeImage(newSize, image: image)
            }

            return imagedata!.base64EncodedString(options: .endLineWithCarriageReturn)
        }

        private func resizeImage(_ imageSize: CGSize, image: UIImage) -> Data {
            UIGraphicsBeginImageContext(imageSize)
            image.draw(in: CGRect(x: 0, y: 0, width: imageSize.width, height: imageSize.height))
            let newImage = UIGraphicsGetImageFromCurrentImageContext()
            let resizedImage = UIImagePNGRepresentation(newImage!)
            UIGraphicsEndImageContext()
            return resizedImage!
        }

        private func createRequest(with imageBase64: String, handler: @escaping (String) -> Void) {
            // Create our request URL

            var request = URLRequest(url: googleURL)
            request.httpMethod = "POST"
            request.addValue("application/json", forHTTPHeaderField: "Content-Type")
            request.addValue(Bundle.main.bundleIdentifier ?? "", forHTTPHeaderField: "X-Ios-Bundle-Identifier")

            // Build our API request
            let jsonRequest = [
                "requests": [
                    "image": [
                        "content": imageBase64
                    ],
                    "features": [
                        [
                            "type": "LABEL_DETECTION",
                            "maxResults": 10
                        ],
                        [
                            "type": "FACE_DETECTION",   //added by me
                            "maxResults": 10,
                            ]
                    ]
                ]
            ]
            let jsonObject = JSON(jsonDictionary: jsonRequest)
            //let jsonObject = JSONSerialization.jsonObject(with: jsonRequest, options: []) as? [String : Any]

            // Serialize the JSON
            guard let data = try? jsonObject.rawData() else {
                return
            }

            request.httpBody = data



            // Run the request on a background thread
            DispatchQueue.global().async { self.runRequestOnBackgroundThread(request, handler: { (result) in
                handler(result)
            }) }

        }

        private func runRequestOnBackgroundThread(_ request: URLRequest, handler: @escaping (String) -> Void) {
            // run the request

            let task: URLSessionDataTask = session.dataTask(with: request) { (data, response, error) in
                guard let data = data, error == nil else {
                    print(error?.localizedDescription ?? "")
                    return
                }

                self.analyzeResults(data, handler: { (result) in
                    handler(result)
                })
            }

            task.resume()
        }

        private func analyzeResults(_ dataToParse: Data, handler: @escaping (String) -> Void) {

            var textArray: [String] = []
            // Update UI on the main thread
            DispatchQueue.main.async(execute: {

                // Use SwiftyJSON to parse results
                let json = JSON(data: dataToParse)
                let errorObj: JSON = json["error"]
                //Check for errors
                if (errorObj.dictionaryValue != [:]) {
                    print("Error code \(errorObj["code"]): \(errorObj["message"])")
                } else {

                    //Parse the response
                    let responses: JSON = json["responses"][0]

                    //Get text
                    let textAnnotations: JSON = responses["textAnnotations"]
                    //DEBUG PRINT print(textAnnotations)

                    let numTextAnnos: Int = textAnnotations.count

                    if numTextAnnos > 0 {

                        for index in 0..<numTextAnnos {
                            let text = textAnnotations[index]["description"].stringValue
                            textArray.append(text)
                            //DEBUG PRINT print("TEXT = \(text)")

                        }

                        let finalString = textArray.joined(separator: " ")
                        let filteredString = finalString

                        handler(filteredString)

                    }
                }
            })
        }

}

The processes for TEXT_DETECTION and FACE_DETECTION are essentially the same from the client's perspective. There's only one URL `https://vision.googleapis.com/v1/images:annotate` which should be sent a `POST` request. The main difference is the content of the request and response. This is described in the [**images.annotate**](https://cloud.google.com/vision/docs/reference/rest/v1/images/annotate) documentation. Is the POST request successfully sent from the iOS device? What is the response? Can you reproduce this using the API explorer in the previous link? — Nicholas, May 10 '17 at 14:50

score 3 · Answer 1 · answered Jul 12 '18 at 09:20

This function from my project might help you.

func applyOCR(image: UIImage, completionHandler: @escaping(_ error: Error ? , _ data : String ? ) - > Void) {



    let imageData: NSData = UIImageJPEGRepresentation(image, 0.2) !as NSData
    var base64 = imageData.base64EncodedString(options: .endLineWithCarriageReturn)


    var body = "{ 'requests': [ { 'image': { 'content': '\(base64)' }, 'features': [ { 'type': 'DOCUMENT_TEXT_DETECTION' } ],  'imageContext': {'languageHints': ['en']} } ] }";


    var session = URLSession.shared

    let url = URL(string: "https://vision.googleapis.com/v1/images:annotate?key={Your Key}")

    var request = NSMutableURLRequest(url: url!, cachePolicy:
        NSURLRequest.CachePolicy.reloadIgnoringLocalAndRemoteCacheData,
        timeoutInterval: 30.0)

    request.httpMethod = "POST"
    request.setValue("application/json", forHTTPHeaderField: "Content-Type")

    request.httpBody = body.data(using: .utf8)


    let task = session.dataTask(with: request as URLRequest, completionHandler: {
        data,
        response,
        error in

        if let error = error {
            print(error.localizedDescription)
            completionHandler(error, nil)
        }


        if let data = data {

            do {
                let string1 = String(data: data, encoding: String.Encoding.utf8) ? ? "Data could not be printed"
                //print(string1)

                var json =
                    try JSONSerialization.jsonObject(with: data, options: .allowFragments) as![String: Any]
                if let responseData = json["responses"] as ? NSArray {

                    if let levelB = responseData[0] as ? [String: Any] {

                        if let levelC = levelB["fullTextAnnotation"] as ? [String: Any] {


                            if let text = levelC["text"] as ? String {

                                completionHandler(nil, text)
                                return

                            }

                        }

                    }



                }

                let error = NSError(domain: "", code: 401, userInfo: [NSLocalizedDescriptionKey: "Invaild access token"])
                completionHandler(error, nil)
                return


            } catch {
                print("error parsing \(error)")
                completionHandler(error, nil)
                return



            }

        }

    })

    task.resume()

}

You can also use google mobile vision library which is free and very easy to use https://developers.google.com/vision/ios/text-overview

It is currently part of MLKit. Here is update link: https://developers.google.com/ml-kit/vision/text-recognition/ios — Marcin Żmigrodzki, Apr 13 '21 at 08:31

how to use the google cloud vision OCR on iOS (swift)

1 Answers1