When I run the script in headless mode, it simply times out on page.goto(url)
. When I run it with headless:false
and just let it do its thing, you can see URL start to load for a moment, then go into a sort of redirect and endless loading.
However, if while in headless:false
, I open up a new tab and manually navigate to URL, then the original tab will load fine. I'm already taking a lot of steps to avoid detection here;
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
const userAgent = require('user-agents');
await puppeteer.use(StealthPlugin())
var browser = await puppeteer.launch({headless: false});
let page = await browser.newPage();
page.setViewport({
width: 1200,
height: 800,
deviceScaleFactor: 1,
hasTouch: false,
isLandscape: true,
isMobile: false,
});
var agent = userAgent.random()
await page.setUserAgent(agent.toString());
await page.setJavaScriptEnabled(true);
// Pass the Webdriver Test.
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => false,
});
});
// Pass the Chrome Test.
await page.evaluateOnNewDocument(() => {
// We can mock this in as much depth as we need for the test.
window.navigator.chrome = {
runtime: {},
// etc.
};
});
// Pass the Permissions Test.
await page.evaluateOnNewDocument(() => {
const originalQuery = window.navigator.permissions.query;
return window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
});
// Pass the Plugins Length Test.
await page.evaluateOnNewDocument(() => {
// Overwrite the `plugins` property to use a custom getter.
Object.defineProperty(navigator, 'plugins', {
// This just needs to have `length > 0` for the current test,
// but we could mock the plugins too if necessary.
get: () => [1, 2, 3, 4, 5],
});
});
// Pass the Languages Test.
await page.evaluateOnNewDocument(() => {
// Overwrite the `plugins` property to use a custom getter.
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en'],
});
});
const session = await page.target().createCDPSession();
await session.send("Page.enable");
await session.send("Page.setWebLifecycleState", { state: "active" });
await page.bringToFront();
await page.goto(url, {waitUntil: "networkidle2"} );
Any ideas how I'm still tipping them off that I'm running puppeteer unless I manually open a new tab and type into the address bar? Or, is there a way to force a more human-like interaction in the browser that opens the new tab and might allow me to do this headless?
edit: To be clear when I say "go into a sort of redirect and endless loading", what happens is that I see a brief flash of the page rendering, and then it goes to a blank white page. No change is noticed in the address bar but the loading icon indicator seems to show some type of redirection or refreshing. Whether I manually open the new tab before, during or after the puppeteer-created tab, as soon as the manual tab begins to load the URL, the puppeteer-created tab suddenly begins working.