I'm new to this so please bare with me. I made a simple app for scraping some review data that I needed for work which uses puppeteer and chalk and is fully functional in this way. The code for the functional puppeteer version is as follows:
const puppeteer = require('puppeteer')
const chalk = require('chalk')
async function scrapeIt(){
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto(process.argv[2]);
await page.hover('#reviews');
await page.hover('#amenities');
page.on('console', consoleObj => {
if (consoleObj.type() === 'log') {
console.log(chalk.green(consoleObj.text()));
}
})
await page.evaluate(_ => {
setTimeout(() => {
everything();
function everything() {
let theseReviews = document.querySelectorAll('.review__content')
let uglyTitle = theseReviews[0].textContent.split('Stayed')[0]
let regex = new RegExp(".{1}([\\\/]).");
let regexMatch = RegExp("\\\d{2,6}");
let cleanTitle = uglyTitle.split(regex)[0];
theseReviews.forEach(function (el) {
uglyTitle = el.textContent.split('Stayed')[0]
cleanTitle = uglyTitle.split(regex)[0];
if (cleanTitle.match(regexMatch)){
console.log(cleanTitle + " MATCH")
}
})
}
let nextButton = document.querySelector('#reviews > div > div > div > div > div > div.review-list > div.pagination > button.btn.btn-icon.ButtonIcon.btn-default.btn-sm.pagination__next.btn-icon-circle > span.SVGIcon.SVGIcon--16px.flex-center');
nextButton.onclick = function () {
setTimeout(() => {
everything();
}, 1000);
}
}, 1000);
});
await page.waitForTimeout(3000)
await page.click('#reviews')
const hrefElement = await page.$$('div.review-list > div.pagination .SVGIcon');
const reviewCount = await page.$('.reviews-summary__reviews_count_small');
let value = await reviewCount.evaluate(el => el.textContent)
await page.waitForTimeout(2000)
console.log(`Acting on ${value}:`)
await hrefElement[1].click();
while (hrefElement[1]) {
await page.waitForTimeout(1000)
hrefElement[1].click()
} {
await page.close();
browser.close();
}
}
scrapeIt();
My next thought was to make a simple front-end to this code. I didn't want to use endpoints because running a server for this task seemed a little much. I figured Electron would be a good choice, my code for which is as follows (i know inline-scripts, nodeIntegration, and generally the way this is implemented is unsafe. it's not intended to ever go beyond personal use):
main.js
const {app, BrowserWindow} = require('electron')
function createWindow () {
const mainWindow = new BrowserWindow({
width: 400,
height:100,
webPreferences: {
nodeIntegration: true,
contextIsolation: false
}
})
mainWindow.setAlwaysOnTop(true, 'screen');
mainWindow.loadFile('index.html')
}
app.whenReady().then(() => {
createWindow()
app.on('activate', function () {
if (BrowserWindow.getAllWindows().length === 0) createWindow()
})
})
app.on('window-all-closed', function () {
if (process.platform !== 'darwin') app.quit()
})
index.html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<!-- https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP -->
<meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' 'unsafe-inline'"> <meta http-equiv="X-Content-Security-Policy" content="default-src 'self'; script-src 'self' 'unsafe-inline'">
<title>Review Scraper</title>
<script src="./reviewListing.js"></script>
</head>
<body>
URL: <input id="title"/>
<button id="btn" onclick = "reviewListing()" type="button">Set</button>
</body>
</html>
reviewListing.js
const puppeteer = require('puppeteer');
async function reviewListing() {
let title = document.getElementById('title').value
console.log(title)
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto(title);
await page.hover('#reviews'); //the reason I've only gone this far is because this is where it stops working.
await page.hover('#amenities');
}
Puppeteer stops working at this point. It can launch the browser, go to the link provided in the text area, take screenshots, close the browser, but it seems to have no control over scrolling, clicking, or any of those other baked in functions that work perfectly when it's not launched from an Electron app.
error thrown in app mainWindow(not puppeteer):
Passed function is not well-serializable!
I've tried everything I can think of including different require chains, packaging the application, pointing to a chrome directory on my machine instead of chromium, and some IPCmain and Renderer solutions to no avail. I've also tried the puppeteer-in-electron package, which seems to do nothing as far as even launching the browser at the point of writing.
Any time or insight is greatly appreciated.