0

I am following the tutorial to extract text from images at:

https://cloud.google.com/functions/docs/tutorials/ocr?authuser=1

But I do not wish to translate the text, I wish to detect and save the text.

The tutorial implements 3 functions:

gcloud beta functions deploy ocr-extract --trigger-bucket [YOUR_IMAGE_BUCKET_NAME] --entry-point processImage

gcloud beta functions deploy ocr-translate --trigger-topic [YOUR_TRANSLATE_TOPIC_NAME] --entry-point translateText

gcloud beta functions deploy ocr-save --trigger-topic [YOUR_RESULT_TOPIC_NAME] --entry-point saveResult

I just wish to detect text and save the text but I could not remove the translation portion of the code below:

/**
 * Detects the text in an image using the Google Vision API.
 *
 * @param {string} bucketName Cloud Storage bucket name.
 * @param {string} filename Cloud Storage file name.
 * @returns {Promise}
 */
function detectText (bucketName, filename) {
  let text;

  console.log(`Looking for text in image ${filename}`);
  return vision.textDetection({ source: { imageUri: `gs://${bucketName}/${filename}` } })
    .then(([detections]) => {
      const annotation = detections.textAnnotations[0];
      text = annotation ? annotation.description : '';
      console.log(`Extracted text from image (${text.length} chars)`);
      return translate.detect(text);
    })
    .then(([detection]) => {
      if (Array.isArray(detection)) {
        detection = detection[0];
      }
      console.log(`Detected language "${detection.language}" for ${filename}`);

      // Submit a message to the bus for each language we're going to translate to
      const tasks = config.TO_LANG.map((lang) => {
        let topicName = config.TRANSLATE_TOPIC;
        if (detection.language === lang) {
          topicName = config.RESULT_TOPIC;
        }
        const messageData = {
          text: text,
          filename: filename,
          lang: lang,
          from: detection.language
        };

        return publishResult(topicName, messageData);
      });

      return Promise.all(tasks);
    });
}

After that, I just wish to save the detectec text to a file, as the code below shows:

/**
 * Saves the data packet to a file in GCS. Triggered from a message on a Pub/Sub
 * topic.
 *
 * @param {object} event The Cloud Functions event.
 * @param {object} event.data The Cloud Pub/Sub Message object.
 * @param {string} event.data.data The "data" property of the Cloud Pub/Sub
 * Message. This property will be a base64-encoded string that you must decode.
     */
exports.saveResult = (event) => {
  const pubsubMessage = event.data;
  const jsonStr = Buffer.from(pubsubMessage.data, 'base64').toString();
  const payload = JSON.parse(jsonStr);

  return Promise.resolve()
    .then(() => {
      if (!payload.text) {
        throw new Error('Text not provided. Make sure you have a "text" property in your request');
      }
      if (!payload.filename) {
        throw new Error('Filename not provided. Make sure you have a "filename" property in your request');
      }
      if (!payload.lang) {
        throw new Error('Language not provided. Make sure you have a "lang" property in your request');
      }

      console.log(`Received request to save file ${payload.filename}`);

      const bucketName = config.RESULT_BUCKET;
      const filename = renameImageForSave(payload.filename, payload.lang);
      const file = storage.bucket(bucketName).file(filename);

      console.log(`Saving result to ${filename} in bucket ${bucketName}`);

      return file.save(payload.text);
    })
    .then(() => {
      console.log(`File saved.`);
    });
};
enle lin
  • 1,664
  • 8
  • 14
Cristiana SP
  • 143
  • 2
  • 9
  • I'd follow these instead: https://github.com/googleapis/nodejs-vision/tree/master/samples I can help if you're having issues with those samples. – Torry Yang Jun 26 '18 at 21:37
  • Thanks but I do not wish to install the libraries locally, I wish to run the translation as a function in google cloud... I tried to use the code in the link that you sent but since I am not js expert I was not able to do it. – Cristiana SP Jul 02 '18 at 14:38
  • the tutorial and the samples use the same libraries and can both run on google cloud. would you prefer to do this with Go, Java, .NET, PHP, Python or Ruby? – Torry Yang Jul 02 '18 at 17:12
  • Torry Yang thanks for your help. Python – Cristiana SP Jul 03 '18 at 18:52
  • Are you still trying to use this code or you moved to Python? @CristianaS.Parada – Mangu Aug 21 '18 at 07:42
  • @Mangu still trying to use thiis code. – Cristiana SP Aug 22 '18 at 15:55

1 Answers1

3

So, the tutorials there are based in a much more 'complex' setup (using Pub Sub and Translate also), and you only want to extract the text, so, with this, you should be able:

'use strict';
const Storage = require('@google-cloud/storage');
const Vision = require('@google-cloud/vision');
const bucketName = 'YOUR_BUCKET';
const srcFilename = 'YOUR_IMAGE.jpg';
const projectId = 'YOUR_PROJECT_ID';
const storage = new Storage({
    projectId: projectId
});
const vision = new Vision.ImageAnnotatorClient({
    projectId: projectId
});
exports.processImage = (req, res) => {
    let text;
    vision.textDetection(`gs://${bucketName}/${srcFilename}`)
        .then(([detections]) => {
            const annotation = detections.textAnnotations[0];
            text = annotation ? annotation.description : '';
            console.log(`Extracted text: ${text}`);
            console.log(`Extracted text from image (${text.length} chars)`);
        }).catch(vis_err => {
            console.error("Vision error:" , vis_err);
        });
    res.status(200).send("OK");
}

My dependencies, in my package.json file:

  "dependencies": {
    "@google-cloud/vision": "0.21.0"
  },

You can later on extend this to save this text to Storage, if you wish to. There are other tutorials on how to do so.

Mangu
  • 3,160
  • 2
  • 25
  • 42