According to https://github.com/GoogleChrome/puppeteer/issues/628, I should be able to get all links from < a href="xyz" > with this single line:
const hrefs = await page.$$eval('a', a => a.href);
But when I try a simple:
console.log(hrefs)
I only get:
http://example.de/index.html
... as output which means that it could only find 1 link? But the page definitely has 12 links in the source code / DOM. Why does it fail to find them all?
Minimal example:
'use strict';
const puppeteer = require('puppeteer');
crawlPage();
function crawlPage() {
(async () => {
const args = [
"--disable-setuid-sandbox",
"--no-sandbox",
"--blink-settings=imagesEnabled=false",
];
const options = {
args,
headless: true,
ignoreHTTPSErrors: true,
};
const browser = await puppeteer.launch(options);
const page = await browser.newPage();
await page.goto("http://example.de", {
waitUntil: 'networkidle2',
timeout: 30000
});
const hrefs = await page.$eval('a', a => a.href);
console.log(hrefs);
await page.close();
await browser.close();
})().catch((error) => {
console.error(error);
});;
}