0

I am using puppeteer-clustor and imagemagick (convert) / xwd command to take screenshot of complete desktop.

Would need browser with viewable part of page and browser navigation buttons and URL. I could get screenshot most of the times, however it does fail other times.

Error message is the tab is closed screenshot is done. Please suggest what is that I am doing wrong.

Code runs on linux with a X running on DISPLAY:0.3. I can see

Below is the code which I have I tried blockingWait and also

const {
  Cluster
} = require('puppeteer-cluster');
const execSync = require('child_process').execSync;

process.env['DISPLAY'] = ':0.3';
let i = 0;

function wait(time) {
  return new Promise((resolve) => setTimeout(resolve, time));
}

function blockingWait(seconds) {
  //simple blocking technique (wait...)
  var waitTill = new Date(new Date().getTime() + seconds * 1000);
  while (waitTill > new Date()) {}
}

function getscreenshot(url, page) {
  page.bringToFront(); // Get the tab to focus 
  wait(200);
  i = i + 1; // For now get screenshot as number will add image named based on URL 
  path = i + '.jpg';
  var r = execSync('import -window root ' + path);
  console.log('Taken screenshot: ' + path);
  console.log(url);
  blockingWait(1);
}

(async () => {
  // Create a cluster with 6 workers or 6 tabs which loads all the url
  const cluster = await Cluster.launch({
    concurrency: Cluster.CONCURRENCY_PAGE,
    maxConcurrency: 6,
    timeout: 120000,
    puppeteerOptions: {
      executablePath: 'google-chrome-stable',
      args: [
        '--ignore-certificate-errors',
        '--no-sandbox',
        '--incognito',
        '--disable-infobars',
        '--disable-setuid-sandbox',
        '--window-size=1600,1200',
        '--start-maximized',
        '--disable-gpu'
      ],
      headless: false, //headless:false so we can watch the browser as it works
    },
  });
  console.log('cluster launched');

  // We don't define a task and instead use own functions
  const screenshot = async ({
    page,
    data: url
  }) => {
    console.log('screenshot entered ');
    await page.setExtraHTTPHeaders({
      'CUSTOMER-ID': "66840"
    }, ); // use same customer id as header
    await page.setViewport({
      width: 1600,
      height: 1200
    });
    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3419.0 Safari/537.36');
    await page.goto(url, {
      waitUntil: 'domcontentloaded'
    }, {
      waitUntil: 'networkidle0'
    }, {
      waitUntil: 'load'
    });
    // Since we wait the page to fully load

    await page.waitForResponse(response => response.ok()) // ok page is ready .. will deal here for other HTTP error beside 200, 404,500 etc 

    await page.waitForNavigation({
      waitUntil: 'domcontentloaded'
    }, {
      waitUntil: 'networkidle0'
    }, ); // Wait for page to load before screenshot
    await page.bringToFront(); // Get the tab to focus 
    wait(100); // Blocking wait
    console.log('Waiting 5 sec');
    blockingWait(5); // different kind of wait
    getscreenshot(url, page);
    console.log('screenshot exited');
  };

  const extractTitle = async ({
    page,
    data: url
  }) => {
    console.log('scrapelinks entered');
    await page.setExtraHTTPHeaders({
      'CUSTOMER-ID': "66840"
    }, );
    await page.setViewport({
      width: 1600,
      height: 1200
    });
    await page.goto(url);
    const pageTitle = await page.evaluate(() => document.title); // will later used to confirm the page matches with client details.
    // get all Links on the page
    const hrefs = await page.$$eval('a', hrefs => hrefs.map((a) => {
      return {
        href: a.href,
        text: a.textContent,
      };
    }));
    // get 1st links matching text or link value having bioanalyzer-systems/instrument-2100.xhtml
    for (let postUrl of hrefs) {
      if (postUrl.text.indexOf("Client-s") > -1) {
        cluster.execute(postUrl.href, screenshot); // add this link also to queue
      } else if (postUrl.href.indexOf("bioanalyzer-systems/instrument-2100.xhtml") > -1) {
        cluster.execute(postUrl.href, screenshot); // add this url to queue
        break;
      }
    }
    console.log('scrapelinks exited');
  };

  // Make screenshots
  cluster.execute('http://www.internal-site.int/en/product/66840?product=NEW&CodeList=bio&Id=66840', screenshot);
  cluster.execute('http://www.internal-site.int/en/product/66840?product=USED&CodeList=nonbio&Id=66840', screenshot);

  // But also do some other stuff
  cluster.execute('http://www.internal-site.int/en/product/66840?product=NEW&CodeList=bio&Id=66840', extractTitle);
  cluster.execute('http://www.internal-site.int/en/product/66840?product=USED&CodeList=nonbio&Id=66840', extractTitle);

  await cluster.idle();
  await cluster.close();
})();```

I expect output to take screenshot once the page or tab load is completed.
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
Chakra
  • 66
  • 1
  • 7

1 Answers1

1

The page is being closed as soon as the function is finished executing (or the Promise is resolved). You are not using await to wait for the asynchronous action to finish.

For example, in your screenshot function, there is the following code:

wait(100);
console.log('Waiting 5 sec');
blockingWait(5);
getscreenshot(url, page);
console.log('screenshot exited');

The first line calls the wait function (which is async), but as you are not awaiting it, the function will be executed in the background and Node.js will continue to execute your script.

The blockingWait is not the JavaScript-like way to write code. This completely blocks the execution.

The getscreenshot function should again be async so that you can await it. Also, some of the puppeteer function calls should have await in front of them (e.g. page.bringToFront) to wait until they are finished.

In general, you should check out the concept of async/await and Promises to understand where and why you should be using these keywords.

Thomas Dondorf
  • 23,416
  • 6
  • 84
  • 105
  • I am not sure how to resolve this. I have used Promise as suggested in another post. However it does not work . await page.bringToFront(); and then taking screenshot. But this does not help. Something like ``` await Promise.all([ page.bringToFront(), execSync('import -window root ' + path +'.jpg'), screenshot(page,url), console.log(`closing page: ${url}`), page.close(), ]); ``` .. Any suggestion how to make page come to front .. wait for page to display fully and then take a screenshot using import (imagemagick utility). – Chakra Jun 27 '19 at 23:02