-1

I have a list of website about 1000 when I tried to make Playwright scrape this array with a for loop it stops after the first link but brings me the data I need.

const scrapeCrunchbaseWebsite = (async () => {

   console.log(crunchbaseData);
  var crunchbaseData = ["https://www.crunchbase.com/organization/oracle", "https://www.crunchbase.com/organization/oracle/company_financials"]
  for (i = 0; i < crunchbaseData.length; i++) {

    const browser = await playwright.firefox.launch({
      headless: true,
    });
    const context = await browser.newContext();
     var crunchbaseOrgLink = crunchbaseData[i ];
    console.log(crunchbaseOrgLink);
    const page = await context.newPage();
    await page.goto(
      crunchbaseOrgLink
    );
    const html = await page.evaluate(() => document.body.innerHTML); // Save the page's HTML to a variable

    // console.log(html);
    const $ = cheerio.load(html); // Use Cheerio to load the page's HTML code
    // Continue writing your scraper using Cheerio's jQuery syntax

    await structureCrunchbaseSummaryData($);

    // finanicalValues.indexOf("Acquisitions");

     
    await browser.close();
   

  }

})();

So I just installed @test/Playwright because I saw it could work with loops it doesn't bring me any data Having a problem?Experiencing issues with this page? please let us know:You can contact us for assistance. You should use Ref ID: 7db8ed32-48c8-11ee-ae51-75486970695a You can also send us your feedback:✓Thank you for the feedback

const { expect, test, devices } = require('@playwright/test');
const cheerio = require('cheerio');

test.use(devices['iPhone 11']);

test('should be titled', async ({ page, context }) => {
  // await context.route('**.jpg', route => route.abort());
  // await page.goto('https://www.crunchbase.com/organization/oracle');
  page = await context.newPage();
  await page.goto('https://www.crunchbase.com/organization/oracle');
  const html = await page.evaluate(() => document.body.innerHTML); // Save the page's HTML to a variable


  const $ = cheerio.load(html);
  // console.log(" descripcompanyNametion" + html);
  console.log($('span'));
  var fields = $('span');
  var table_header = fields.find('span').map(function () { return $(this).text().trim(); }).toArray();
  console.log(" descripcompanyNametion" + table_header);

  // for (var ele of $('span')) {

  //   var companyName = $(ele).text();
  //   //  companyName = companyName.replace(/\s/g, "");
  //   console.log(" descripcompanyNametion" + companyName);

  // }

});

module.exports = { test };
Steven
  • 15
  • 4

0 Answers0