0

I am using firebase functions to crop certain area of pdf and convert them to image using ghostscript [The wrapper https://www.npmjs.com/package/node-gs and compiled version of gs v9.2 "https://github.com/sina-masnadi/node-gs/tarball/master" ]

and this is the code i am using :

const functions = require('firebase-functions');
const { Storage } = require('@google-cloud/storage');
const gcs = new Storage();
const spawn = require('child-process-promise').spawn;
const path = require('path');
const os = require('os');
const fs = require('fs');
var gs = require('gs');


const THUMB_MAX_HEIGHT = 200;
const THUMB_MAX_WIDTH = 200;
const THUMB_SUFFIX = '-thumb';

//This function triggers whenever any pdf is uploaded to the firebase storage
//and attempts to generate

exports.makePreviews = functions.storage.object().onFinalize(async (object, event) => {

  //Checking for pdf files
  if (!object.name.endsWith('.pdf')) return false;

  const filePath = object.name;

  //slicing name and path
  const splitFileName = object.name.split(".");
  console.log(splitFileName);
  const fileID = splitFileName;

  //creating temporary path strings for gcp file system
  const fileName = path.basename(filePath);
  const tempFilePath = path.join(os.tmpdir(), fileName);

  const newName1 = path.basename(filePath, '.pdf') + '01.jpeg';
  const tempNewPath1 = path.join(os.tmpdir(), newName1);

  const newName2 = path.basename(filePath, '.pdf') + '02.jpeg';
  const tempNewPath2 = path.join(os.tmpdir(), newName2);

  const thumbName = path.basename(filePath, '.pdf') + THUMB_SUFFIX + '.jpeg';
  const tempThumbPath = path.join(os.tmpdir(), thumbName);


  //downloading file from firebase storage
  const bucket = gcs.bucket(object.bucket);

  return bucket.file(filePath).download({
    destination: tempFilePath
  }).then(async () => {
    console.log('PDF downloaded locally to', tempFilePath);

    //generating two preview JPEGS
    await new Promise((resolve, reject) => {
      gs()
        .safer()
        .batch()
        .nopause()
        .option('-dTextAlphaBits=4')
        .option('-dGraphicsAlphaBits=4')
        .option('-dDEVICEWIDTHPOINTS=238')
        .option('-dDEVICEHEIGHTPOINTS=149.5')
        .option('-dFIXEDMEDIA')
        .res(600)
        .option('-dDownScaleFactor=2')
        .executablePath('gs')
        .device('jpeg')
        .output(tempNewPath2)
        .option('-c "<</PageOffset[-308.5 40]>> setpagedevice"')
        .option('-sPDFPassword=01011977')
        .input(tempFilePath)
        .exec((err, stdout, stderr) => {
          if (!err) {
            console.log('Part One Exceuted');
            bucket.upload(tempNewPath1, {
              destination: 'files/' + fileID + '.jpeg'
            }).then(() => {
              console.log('stdout', stdout);
              console.log('stderr', stderr);
            }).catch(err => {
              console.log(err);
            });
            resolve();
          } else {
            console.log('gs error:', err);
            reject(err);
          }
        });
    });

    await new Promise((resolve, reject) => {
      gs()
        .safer()
        .batch()
        .nopause()
        .option('-dTextAlphaBits=4')
        .option('-dGraphicsAlphaBits=4')
        .option('-dDEVICEWIDTHPOINTS=238')
        .option('-dDEVICEHEIGHTPOINTS=149.5')
        .option('-dFIXEDMEDIA')
        .res(600)
        .option('-dDownScaleFactor=2')
        .executablePath('gs')
        .device('jpeg')
        .output(tempNewPath2)
        .option('-c "<</PageOffset[-308.5 40]>> setpagedevice"')
        .option('-sPDFPassword=01011977')
        .input(tempFilePath)
        .exec((err, stdout, stderr) => {
          if (!err) {
            console.log('gs Part two excuted');
            bucket.upload(tempNewPath1, {
              destination: 'files/' + fileID + '-2.jpeg'
            }).then(() => {
              console.log('stdout', stdout);
              console.log('stderr', stderr);
            })
              .catch(err => {
                console.log(err);
              });
            resolve();
          } else {
            console.log('gs error:', err);
            reject(err);
          }
        });
    });

    //generating thumbnail from the first JPEG
    return spawn('convert', [tempNewPath1, '-thumbnail', `${THUMB_MAX_WIDTH}x${THUMB_MAX_HEIGHT}>`, tempThumbPath], {
      capture: ['stdout', 'stderr']
    });

  }).then(async () => {
    console.log('PNG created at', tempNewPath1 + 'and' + tempNewPath2);
    console.log('Thumbnail created at', tempThumbPath);

    //uploading the files back to firebase storage
    return bucket.upload(tempThumbPath, {
      destination: 'files/' + fileID + 'thumb.jpeg'
    });


  }).then(() => {
    //once the files have been uploaded delete the local temporary 
    //files to free up disk space.
    fs.unlinkSync(tempNewPath1);
    fs.unlinkSync(tempNewPath2);
    fs.unlinkSync(tempThumbPath);
    return fs.unlinkSync(tempFilePath);
  }).catch((err) => {
    console.log('exception:', err);
    return err;
  });
});

deploying the above code, The Log:

[ 'PAN_01011977', 'pdf' ]

PDF downloaded locally to /tmp/PAN_01011977.pdf

gs command: -dSAFER,-dBATCH,-dNOPAUSE,-dTextAlphaBits=4,-dGraphicsAlphaBits=4,-dDEVICEWIDTHPOINTS=238,-dDEVICEHEIGHTPOINTS=149.5,-dFIXEDMEDIA,-r600,-dDownScaleFactor=2,-sDEVICE=jpeg,-sOutputFile=/tmp/PAN_0101197702.jpeg,-c "<</PageOffset[-308.5 40]>> setpagedevice",-sPDFPassword=01011977,/tmp/PAN_01011977.pdf

Part One Exceuted

gs command: -dSAFER,-dBATCH,-dNOPAUSE,-dTextAlphaBits=4,-dGraphicsAlphaBits=4,-dDEVICEWIDTHPOINTS=238,-dDEVICEHEIGHTPOINTS=149.5,-dFIXEDMEDIA,-r600,-dDownScaleFactor=2,-sDEVICE=jpeg,-sOutputFile=/tmp/PAN_0101197702.jpeg,-c "<</PageOffset[-308.5 40]>> setpagedevice",-sPDFPassword=01011977,/tmp/PAN_01011977.pdf

{ Error: ENOENT: no such file or directory, stat '/tmp/PAN_0101197701.jpeg'
  errno: -2,
  code: 'ENOENT',
  syscall: 'stat',
  path: '/tmp/PAN_0101197701.jpeg' }

gs Part two excuted

{ Error: ENOENT: no such file or directory, stat '/tmp/PAN_0101197701.jpeg'
  errno: -2,
  code: 'ENOENT',
  syscall: 'stat',
  path: '/tmp/PAN_0101197701.jpeg' }

and the error :

and the error log

exception: { ChildProcessError: `convert /tmp/PAN_0101197701.jpeg -thumbnail 200x200> /tmp/PAN_01011977-thumb.jpeg` failed with code 1
    at ChildProcess.<anonymous> (/srv/node_modules/child-process-promise/lib/index.js:132:23)
    at emitTwo (events.js:126:13)
    at ChildProcess.emit (events.js:214:7)
    at maybeClose (internal/child_process.js:915:16)
    at Process.ChildProcess._handle.onexit (internal/child_process.js:209:5)
  name: 'ChildProcessError',
  code: 1,
  childProcess: 
   ChildProcess {
     domain: 
      Domain {
        domain: null,
        _events: [Object],
        _eventsCount: 1,
        _maxListeners: undefined,
        members: [Array] },
     _events: { error: [Function], close: [Function] },
     _eventsCount: 2,
     _maxListeners: undefined,
     _closesNeeded: 3,
     _closesGot: 3,
     connected: false,
     signalCode: null,
     exitCode: 1,
     killed: false,
     spawnfile: 'convert',
     _handle: null,
     spawnargs: 
      [ 'convert',
        '/tmp/PAN_0101197701.jpeg',
        '-thumbnail',
        '200x200>',
        '/tmp/PAN_01011977-thumb.jpeg' ],
     pid: 14,
     stdin: 
      Socket {
        connecting: false,
        _hadError: false,
        _handle: null,
        _parent: null,
        _host: null,
        _readableState: [Object],
        readable: false,
        domain: [Object],
        _events: [Object],
        _eventsCount: 2,
        _maxListeners: undefined,
        _writableState: [Object],
        writable: false,
        allowHalfOpen: false,
        _bytesDispatched: 0,
        _sockname: null,
        _pendingData: null,
        _pendingEncoding: '',
        server: null,
        _server: null,
        _idleNext: null,
        _idlePrev: null,
        _idleTimeout: -1,
        [Symbol(asyncId)]: 4540,
        [Symbol(bytesRead)]: 0 },
     stdout: 
      Socket {
        connecting: false,
        _hadError: false,
        _handle: null,
        _parent: null,
        _host: null,
        _readableState: [Object],
        readable: false,
        domain: [Object],
        _events: [Object],
        _eventsCount: 3,
        _maxListeners: undefined,
        _writableState: [Object],
        writable: false,
        allowHalfOpen: false,
        _bytesDispatched: 0,
        _sockname: null,
        _pendingData: null,
        _pendingEncoding: '',
        server: null,
        _server: null,
        _idleNext: null,
        _idlePrev: null,
        _idleTimeout: -1,
        write: [Function: writeAfterFIN],
        [Symbol(asyncId)]: 4541,
        [Symbol(bytesRead)]: 0 },
     stderr: 
      Socket {
        connecting: false,
        _hadError: false,
        _handle: null,
        _parent: null,
        _host: null,
        _readableState: [Object],
        readable: false,
        domain: [Object],
        _events: [Object],
        _eventsCount: 3,
        _maxListeners: undefined,
        _writableState: [Object],
        writable: false,
        allowHalfOpen: false,
        _bytesDispatched: 0,
        _sockname: null,
        _pendingData: null,
        _pendingEncoding: '',
        server: null,
        _server: null,
        _idleNext: null,
        _idlePrev: null,
        _idleTimeout: -1,
        write: [Function: writeAfterFIN],
        [Symbol(asyncId)]: 4542,
        [Symbol(bytesRead)]: 232 },
     stdio: [ [Object], [Object], [Object] ] },
  stdout: '',
  stderr: 'convert-im6.q16: unable to open image `/tmp/PAN_0101197701.jpeg\': No such file or directory @ error/blob.c/OpenBlob/2701.\nconvert-im6.q16: no images defined `/tmp/PAN_01011977-thumb.jpeg\' @ error/convert.c/ConvertImageCommand/3258.\n' }

Error serializing return value: TypeError: Converting circular structure to JSON


Function execution took 9561 ms, finished with status: 'ok'

The problem is in using the below option in gs without this the function works but it didn't crops the pdf just converts to full page image.

  //.option('-c "<</PageOffset [ -64.2 40 ]>> setpagedevice"')
  //.option('-c "<</PageOffset [ -308.5 40 ]>> setpagedevice"')

How can i use the above option ?

Edit

Tried to terminate -c with -f but no luck

$ node index.js
gs command: -dSAFER,-dBATCH,-dNOPAUSE,-dTextAlphaBits=4,-dGraphicsAlphaBits=4,-dDEVICEWIDTHPOINTS=238,-dDEVICEHEIGHTPOINTS=149.5,-dFIXEDMEDIA,-r150,-dDownScaleFactor=2,-sPDFPassword=01011977,-sDEVICE=jpeg,-sOutputFile=/home/jcol/Desktop/gs_offline/functions/output.jpeg,-c <</PageOffset[-64.2 40]>>setpagedevice,-f,/home/jcol/Desktop/gs_offline/functions/pan.pdf
Suceess
GPL Ghostscript 9.20 (2016-09-26)
Copyright (C) 2016 Artifex Software, Inc.  All rights reserved.
This software comes with NO WARRANTY: see the file PUBLIC for details.
Processing pages 1 through 1.
Page 1
Loading NimbusSans-Regular font from %rom%Resource/Font/NimbusSans-Regular... 4244908 2808684 2600016 1250276 3 done. 
jecol
  • 21
  • 7

2 Answers2

0

In the absence of an example file (and ideally the actual command line being sent to Ghostscript) as well as the lack of the back channel output (stout and stderr) the only observation I can make is that the 'option' you refer to (actually a piece of PostScript programming) introduces PostScript input with the -c switch but does not terminate it with -f. That means anything which folows this on the comand line will be treated as more PostScript, which is likely to either lead to an error or a 'hang', awaiting more input.

KenS
  • 30,202
  • 3
  • 34
  • 51
  • hello sir please see the updated question i've updated the code and added logs and errors in it. – jecol Apr 03 '20 at 10:46
  • I'm afraid none of that is the back-channel output from Ghostscript. The 'gs command' in your output can't really be the Ghostscript command, not least because it includes commas which Ghostscript won't understand as part of the command line. I'd suggest you try running Ghostscript directly from the command line. That way you'll easily be able to see what the back channel reports. If you can't reproduce the problem then you can compare what your code produces with what works on the command line. If you can reproduce the problem then you can tell me the command line and give me a file. – KenS Apr 03 '20 at 11:28
  • I moved to ubuntu now and tried all the code locally. now it is returining undefined. please see the OP as i have updated it. – jecol Apr 05 '20 at 10:51
  • That isn't the entire back channel output, there should be a **lot** more.Please don't trim it down to what you think is relevant, quote the entire output. If that's all you are getting then you need to find out how to get the rest. Note that, as I mentioned in my answer, you have not terminated the -c with a -f after the PostScript and before the input filename. This means the input filename will be treated as PostScript and itnerpreted that way, Unsurprisingly this will indeed give an error, and the error probably is 'undefined' since the filename isn't valid PostScript. – KenS Apr 05 '20 at 11:01
  • Hey Ken I tried terminating the `-c` with `-f` but no luck. This is the log `Processing pages 1 through 1. Page 1 Loading NimbusSans-Regular font from %rom%Resource/Font/NimbusSans-Regular... 4244908 2808684 2600016 1250276 3 done. undefined` – jecol Apr 05 '20 at 12:19
  • Seriously, that's not the entire back channel output from Ghostscript, you're missing most of the error string, and the entire stack dump, both of which are vital information. However it is processing the file so the 'probability' is that the input file is broken, its also possible that its a bug, you are using an old (4 year old) version of Ghostscript and you should update to the current version anyway. Other than that I'd need to see the input PDF file. – KenS Apr 05 '20 at 12:47
  • Sorry but that's the log i am getting and i really don't know how to get the back channel output manually. And for the pdf i just can't throw it here. Can you give me your mail id ? you can use temp-mail if you don't want throw the real one here. oh yeah and for the gs version the latest is 9.26 on ubuntu i tried to update but that's the latest available via `apt-get`. – jecol Apr 05 '20 at 14:17
  • You must be running an old version of Ubuntu, I know for certain that isn't the current version available, though if you are running an LTS it might be. Without the log or the PDF file there's not much I can tell you except that the interpreter has exited with (apparently) an undefined error. That basically means it was asked to interpret something it didn't understand. It 'looks liie' (but again without the full log I can't tell) its started running the PDF file and then got an error. – KenS Apr 05 '20 at 15:11
  • Instead of running exec from node.js try running Ghostscript from the command line using the command you've got and see what happens. If it works then its not Ghostscript. If it doesn't work you'll have the full log. Either way you'll be ahead. – KenS Apr 05 '20 at 15:12
  • Yes it works from the command line ! that's the thing i am trying to say. And i am using LTS 18.04 it and i think the problem would the be the version of gs lambada [9.20] which is being used. – jecol Apr 05 '20 at 16:03
  • That is distinctly possible, that's a very old version. If it works from the command line then I'd pick up a copy of Ghostscript 9.20, there are old binaries which *might* work on your Ubuntu, and there are old source tarballs available, but obviously you'd have to build those. Perhaps you should just contact the Lmbda-ghostscript maintainer and ask them to make a newer version available. Not least because versions prior to 9.50 have some well known security vulnerabilities. – KenS Apr 05 '20 at 18:30
  • I don't think it's possible for me to build the `ghostscript` from source code. I tried yesterday and i have to reinstall the os. new to inux. tried 9.52 from here https://www.archlinux.org/packages/extra/x86_64/ghostscript/ and but don't know for a reason when i check --version it returns 9.20 i have double checked everything. And there is libgs.so file in libs folder but don't know how to use that from node.js ghostscript4js is what i found but seems that it won't fit in my needs as that have to use some environmental variable while installation and that won't work with firebase functions. – jecol Apr 06 '20 at 16:28
  • the library node-gs is last updated 3 years ago and same for the ghostscript. tried the ghostscript 9.5 github.com/mcodina86/lambda-ghostscript but same undefined return with the cropped file(not cropped according to pageoffset as it is not called) and i think the lib doesn't know how to handle on `-c` switch. – jecol Apr 06 '20 at 17:12
  • I really don't know how I can help you here. Ghostscript **does** support the PageOffset page device parameter, all versions for the last 20 years or more do so. You say it works from the command line so clearly there's nothing wrong with Ghostscript itself, its the implementation you are using, or the way you are using it. In the absence of the back channel log I can't even guess as to what's wrong. If you can't build Ghostscript from source, then I think you are out of luck. – KenS Apr 06 '20 at 18:56
  • Is there any way to force get back channel output as in cmdline it is working even with the 9.20 version and not in the node.js version so the error is definatily not of gs. if you have time then can you please take a look at node-gs ? – jecol Apr 06 '20 at 19:31
  • There's no point in me looking at node-gs, I know absolutely nothing about it. You can redirect the stdout output from Ghostscript using -sstdout= as described here https://www.ghostscript.com/doc/9.52/Use.htm#Interaction_related_parameters Ordinarily you can use > and 2> to redirect stdout and stderr to files, I'd guess you can use those as well, depends on the underlying OS. Ghostscript sends its output to either stdout or stderr depending on whether its error messages or not. – KenS Apr 07 '20 at 07:31
0

For Future Readers

And For Newbies like ME

There was some confusion in using libraries in NODEJS as i was new to it.

Firstly i was using some gs-wrapper from some tutorials which was not on NPM and that was the cause of the error as that didn't for some reason supported commands.

After some Research i came to Node-gs from npm and i checked Node-GS API at their page in NPM and it had dedicated option for command option.

Now why i posted this Answer :

The NODE-GS library is the only to support serverless architecture like Firebsae Functions. But error handling in this is worst as this only spawns Ghostscirpt directly from the Executable.

For Example if you provide wrong password for PDF then the library just going to push Ghostscript Unrecoverable Error in error Message.

(I know if you are using Ghostscirpt in serverless functions you're smart enough to check if password is correct or not on client side but just for example)

At the time of writing this i found Ghostscript4JS for NODE which uses C++ API of Ghostscript but unfortunately this library doesn't supports serverless architecture as the library depends on system installed Ghostscirpt but the developer said it has some some plans on it.

Check if the library now supported serverless Architecture.

And in the end the struggle as a newbie i had to go through was find portable version of Ghostscirpt to use with NODE-GS and you can find it here Ghostscript Releases

For Firebase Function Users Firebase functions is built upon Ubuntu Bionic 18.04 arch x64 so you have to use x86_64 version of Ghostscript and The latest at the time of writing is Ghostscript 9.52

B-Bye :)

jecol
  • 21
  • 7