I'm going through trying to create an app that will populate a database over time with information from our website. I'm doing this with Cloud Code on a Parse server over at back4app.com. I have the following code set up, and the problem is it won't scrape the data from the table. I've inspected the element, and am confident that I have it chosen correctly, but any help would be greatly appreciated. The code runs, but the log returns an empty array and nothing gets added
const axios = require('axios');
const { JSDOM } = require('jsdom');
const https = require('https');
// Create an instance of https agent with rejectUnauthorized set to false
const agent = new https.Agent({
rejectUnauthorized: false
});
async function fetchHTMLContent(url) {
const response = await axios.get(url, { httpsAgent: agent });
return response.data;
}
Parse.Cloud.job("fetchAndStoreJobListings", async (request) => {
const htmlContent = await fetchHTMLContent("https://www.pepperdine.edu/spiritual-life/church-relations/resources/jobs/openings.htm");
// Create a virtual DOM from the HTML content
const { document } = new JSDOM(htmlContent).window;
// Log the HTML content to inspect its structure
console.log("HTML Content:", htmlContent);
// Find the table based on its structure or attributes
const table = document.evaluate('//*[@id="pbl_sched"]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
console.log("Table:", table);
// Find the table rows
const rows = table ? table.querySelectorAll("tr") : [];
console.log("Rows:", rows); console.log("Table:", table);
// Find the table rows
// Initialize an array to store the job listings
const jobListings = [];
// Iterate over each row in the table (skipping the header row)
rows.forEach((row, index) => {
if (index === 0) return; // Skip header row
const columns = row.querySelectorAll("td");
// Extract the text from each column
const contactName = columns[1].textContent.trim();
const contactNumber = columns[2].textContent.trim();
const description = columns[3].textContent.trim();
const churchSite = columns[4].textContent.trim();
const salary = columns[5].textContent.trim();
const address = columns[6].textContent.trim();
const city = columns[7].textContent.trim();
const state = columns[8].textContent.trim();
const zip = columns[9].textContent.trim();
// Create a job listing object and add it to the array
const jobListing = {
ContactName: contactName,
ContactNumber: contactNumber,
Description: description,
ChurchSite: churchSite,
Salary: salary,
Address: address,
City: city,
State: state,
Zip: zip
};
jobListings.push(jobListing);
console.log("Extracted Job Listing:", jobListing);
});
console.log("Job listings count:", jobListings.length);
// Store the job listings to the Parse class 'JobListings'
const JobListings = Parse.Object.extend("JobListings");
try {
await Promise.all(jobListings.map(async (listing) => {
const jobListing = new JobListings();
jobListing.set(listing);
await jobListing.save();
}));
console.log("Job listings saved successfully");
} catch (error) {
console.error("Error saving job listings:", error);
}
});