I have a list of website about 1000 when I tried to make Playwright scrape this array with a for loop it stops after the first link but brings me the data I need.
const scrapeCrunchbaseWebsite = (async () => {
console.log(crunchbaseData);
var crunchbaseData = ["https://www.crunchbase.com/organization/oracle", "https://www.crunchbase.com/organization/oracle/company_financials"]
for (i = 0; i < crunchbaseData.length; i++) {
const browser = await playwright.firefox.launch({
headless: true,
});
const context = await browser.newContext();
var crunchbaseOrgLink = crunchbaseData[i ];
console.log(crunchbaseOrgLink);
const page = await context.newPage();
await page.goto(
crunchbaseOrgLink
);
const html = await page.evaluate(() => document.body.innerHTML); // Save the page's HTML to a variable
// console.log(html);
const $ = cheerio.load(html); // Use Cheerio to load the page's HTML code
// Continue writing your scraper using Cheerio's jQuery syntax
await structureCrunchbaseSummaryData($);
// finanicalValues.indexOf("Acquisitions");
await browser.close();
}
})();
So I just installed @test/Playwright because I saw it could work with loops it doesn't bring me any data Having a problem?Experiencing issues with this page? please let us know:You can contact us for assistance. You should use Ref ID: 7db8ed32-48c8-11ee-ae51-75486970695a You can also send us your feedback:✓Thank you for the feedback
const { expect, test, devices } = require('@playwright/test');
const cheerio = require('cheerio');
test.use(devices['iPhone 11']);
test('should be titled', async ({ page, context }) => {
// await context.route('**.jpg', route => route.abort());
// await page.goto('https://www.crunchbase.com/organization/oracle');
page = await context.newPage();
await page.goto('https://www.crunchbase.com/organization/oracle');
const html = await page.evaluate(() => document.body.innerHTML); // Save the page's HTML to a variable
const $ = cheerio.load(html);
// console.log(" descripcompanyNametion" + html);
console.log($('span'));
var fields = $('span');
var table_header = fields.find('span').map(function () { return $(this).text().trim(); }).toArray();
console.log(" descripcompanyNametion" + table_header);
// for (var ele of $('span')) {
// var companyName = $(ele).text();
// // companyName = companyName.replace(/\s/g, "");
// console.log(" descripcompanyNametion" + companyName);
// }
});
module.exports = { test };