0

I'm going through trying to create an app that will populate a database over time with information from our website. I'm doing this with Cloud Code on a Parse server over at back4app.com. I have the following code set up, and the problem is it won't scrape the data from the table. I've inspected the element, and am confident that I have it chosen correctly, but any help would be greatly appreciated. The code runs, but the log returns an empty array and nothing gets added

const axios = require('axios');
const { JSDOM } = require('jsdom');
const https = require('https');

// Create an instance of https agent with rejectUnauthorized set to false
const agent = new https.Agent({
  rejectUnauthorized: false
});

async function fetchHTMLContent(url) {
  const response = await axios.get(url, { httpsAgent: agent });
  return response.data;
}

Parse.Cloud.job("fetchAndStoreJobListings", async (request) => {
  const htmlContent = await fetchHTMLContent("https://www.pepperdine.edu/spiritual-life/church-relations/resources/jobs/openings.htm");

  // Create a virtual DOM from the HTML content
  const { document } = new JSDOM(htmlContent).window;

  // Log the HTML content to inspect its structure
  console.log("HTML Content:", htmlContent);

  // Find the table based on its structure or attributes
const table = document.evaluate('//*[@id="pbl_sched"]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
console.log("Table:", table);

// Find the table rows
const rows = table ? table.querySelectorAll("tr") : [];
console.log("Rows:", rows);  console.log("Table:", table);

  // Find the table rows
 

  // Initialize an array to store the job listings
  const jobListings = [];

  // Iterate over each row in the table (skipping the header row)
  rows.forEach((row, index) => {
    if (index === 0) return; // Skip header row

    const columns = row.querySelectorAll("td");

    // Extract the text from each column
    const contactName = columns[1].textContent.trim();
    const contactNumber = columns[2].textContent.trim();
    const description = columns[3].textContent.trim();
    const churchSite = columns[4].textContent.trim();
    const salary = columns[5].textContent.trim();
    const address = columns[6].textContent.trim();
    const city = columns[7].textContent.trim();
    const state = columns[8].textContent.trim();
    const zip = columns[9].textContent.trim();

    // Create a job listing object and add it to the array
    const jobListing = {
      ContactName: contactName,
      ContactNumber: contactNumber,
      Description: description,
      ChurchSite: churchSite,
      Salary: salary,
      Address: address,
      City: city,
      State: state,
      Zip: zip
    };

    jobListings.push(jobListing);
    console.log("Extracted Job Listing:", jobListing);
  });

  console.log("Job listings count:", jobListings.length);

  // Store the job listings to the Parse class 'JobListings'
  const JobListings = Parse.Object.extend("JobListings");

  try {
    await Promise.all(jobListings.map(async (listing) => {
      const jobListing = new JobListings();
      jobListing.set(listing);
      await jobListing.save();
    }));

    console.log("Job listings saved successfully");
  } catch (error) {
    console.error("Error saving job listings:", error);
  }
});
Andreas Veithen
  • 8,868
  • 3
  • 25
  • 28
user717452
  • 33
  • 14
  • 73
  • 149

0 Answers0