Puppeteer to convert html to pdf using Nodejs in Durable functions(fan out fan in)

Question

I'm working on a small project to convert a large xml to several formatted pdf documents. The large xml contains multiple similar format xmls. So I'm using a single html template for printing all the documents. After producing all the pdf documents I also need to produce a metadata file with some basic info on each document that was printed.

I thought using the fan out fan in scenario of durable functions is a perfect for my use case. I'm working with Nodejs. I setup all my code and it seems to be working fine locally. The Orchestration function looks like the below.

const df = require("durable-functions");

module.exports = df.orchestrator(function* (context) {
    var xmldata = yield context.df.callActivity("DurablegetblobJS1","");
    var tasks = [];
    for (file of xmldata) {
        tasks.push(context.df.callActivity("Durableactivityjs2", file));
    }
    const outputs = yield context.df.Task.all(tasks);
    var finalout = "";
    for (out of outputs){
        console.log('I am done1 :' + out );
        finalout = finalout + out;
    }
    return finalout;
});

DurablegetblobJS1 : Fetches the entire xmls and splits it into multiple smaller xmls(1 per document).

Durableactivityjs2 : Fetches the html template, extracts the different values from the individual xmls and applies them to the html and finally prints out the pdf into an azure storage. It returns the name of the pdf document that was printed for creation of the metadata file. The code for this is below.

var fs = require('fs');
var xml2js       = require('xml2js');
var html_to_pdf = require('html-pdf-node');
var parser       = new xml2js.Parser();

module.exports = async function (context) {
    //console.log("Hello from activity :")
    var xmldict = {}
    var xmltext = context.bindings.name;
    
    //Extract the nodes and attributes
    metadata(xmltext,xmldict);
    report(xmltext,xmldict);
    context.log(xmldict)
    
    const { BlobServiceClient } = require("@azure/storage-blob");
    // Load the .env file if it exists
    require("dotenv").config();
    const AZURE_STORAGE_CONNECTION_STRING = process.env.STORAGE_CONNECTION_STRING || "";
    const blobServiceClient = BlobServiceClient.fromConnectionString(
        AZURE_STORAGE_CONNECTION_STRING
    );
    var containerClient = blobServiceClient.getContainerClient('test');
    var blobname = 'comb_template.html';
    var blockBlobClient = containerClient.getBlockBlobClient(blobname);
    var downloadBlockBlobResponse = await blockBlobClient.download(0);
    var html_template = await streamToText(downloadBlockBlobResponse.readableStreamBody);

    let options = { format: 'A4'};
    let file = { content: html_template};
    const x = await writepdf1(file, options,blobServiceClient,xmldict);
    console.log("Written Blob PDF");
    return x;
   
};

async function writepdf1(file, options,blobServiceClient,xmldict){
    const pdfBuffer = await html_to_pdf.generatePdf(file, options);
    const containerClient = blobServiceClient.getContainerClient('test2');
    const targetblob = xmldict['OU'] + '/' + xmldict['ReportName'] + '/' + xmldict['OU'] + '_' + xmldict['ReportName'] + '_' + xmldict['DocumentID'] + '_' + '.pdf';
    console.log('Blob name :' + targetblob);
    const blockBlobClient_t = containerClient.getBlockBlobClient(targetblob);
    const uploadBlobResponse =  await blockBlobClient_t.upload(pdfBuffer, pdfBuffer.length);
    return targetblob;
}

async function streamToText(readable) {
    readable.setEncoding('utf8');
    let data = '';
    for await (const chunk of readable) {
      data += chunk;
    }
    return data;
  }

function metadata(xmltext,xmldict){
    parser.parseString(xmltext, function (err, result) {
        var test1 = result['HPDPSMsg']['DocumentRequest'][0]['MetaData'][0];  
        Object.entries(test1).forEach(([key, value]) => {
            xmldict[key] = value[0];
         });
    });
    }

function report(xmltext,xmldict){
        parser.parseString(xmltext, function (err, result) {
            var test2 = result['HPDPSMsg']['DocumentRequest'][0]['Report'][0]['$'];   
            Object.entries(test2).forEach(([key, value]) => {
                xmldict[key] = value;
             });
        });
    }

However, when I deploy the entire project into a azure premium function(EP1 - Windows), I see some errors in app insights when I try and execute my function and the pdfs are never generated.

Activity function 'Durableactivityjs2' failed: Could not find browser revision 818858. Run "PUPPETEER_PRODUCT=firefox npm install" or "PUPPETEER_PRODUCT=firefox yarn install" to download a supported Firefox browser binary

I'm a bit clueless how I'm supposed to resolve this. Any help or suggestions would be appreciated.

Puppeteer to convert html to pdf using Nodejs in Durable functions(fan out fan in)

0 Answers0