I am trying to convert documents using the Bluemix Document Conversion service with a Node.js application. I am getting nothing but errors in my app, but the test document I'm using converts fine using the demo page. Below is a minimal app that demonstrates the problem (Note that, while this app is converting a PDF from disk, the "real" app can't do that, hence the Buffer object).
'use strict';
var fs = require('fs');
var DocumentConversionV1 = require('watson-developer-cloud/document-conversion/v1');
var bluemix=require('./my_bluemix');
var extend=require('util')._extend; //Node.js' built-in object extend function
var dcCredentials = extend({
url: '<url>',
version: 'v1',
username: '<username>',
password: '<password>'
}, bluemix.getServiceCreds('document_conversion')); // VCAP_SERVICES
var document_conversion = new DocumentConversionV1(dcCredentials);
var contents = fs.readFileSync('./testdoc.pdf', 'utf8');
var parms={
file: new Buffer(contents,'utf8'),
conversion_target: 'ANSWER_UNITS', // (JSON) ANSWER_UNITS, NORMALIZED_HTML, or NORMALIZED_TEXT
content_type:'application/pdf',
contentType:'application/pdf', //don't know which of these two works, seems to be inconsistent so I include both
html_to_answer_units: {selectors: [ 'h1', 'h2','h3', 'h4']},
};
console.log('First 100 chars of file:\n******************\n'+contents.substr(0,100)+'\n******************\n');
document_conversion.convert(parms, function(err,answerUnits)
{
if (!err)
console.log('Returned '+answerUnits.length);
else
console.log('Error: '+JSON.stringify(err));
});
The results from running this program against the test PDF (782K) is:
$ node test.js
[DocumentConversion] WARNING: No version_date specified. Using a (possibly old) default. e.g. watson.document_conversion({ version_date: "2015-12-15" })
[DocumentConversion] WARNING: No version_date specified. Using a (possibly old) default. e.g. watson.document_conversion({ version_date: "2015-12-15" })
First 100 chars of file:
******************
%PDF-1.5
%����
1 0 obj
<</Type/Catalog/Pages 2 0 R/Lang(en-US) /StructTreeRoot 105 0 R/MarkInfo<<
******************
Error: {"code":400,"error":"Could not push back 82801 bytes in order to reparse stream. Try increasing push back buffer using system property org.apache.pdfbox.baseParser.pushBackSize"}
$
Can someone tell me
- How to get rid of the warning messages
- Why the document is not getting converted
- How do I "increase the push back buffer"
Other documents give different errors, but I'm hoping if I can make this one work then the other errors will go away too.