I'm trying to web scraping to get some coupom values applyed in a product in site, but it keeps blocking me. I've already tried to use proxy with my country IP, changing headers, searching for some "id" to access, like some aws auths, with and without anonymous page, headless true, false and nothing works, here what i tried.
args = [
'--start-maximized',
'--disable-extensions',
'--hide-scrollbars',
'--disable-bundled-ppapi-flash',
'--mute-audio',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-gpu',
'--proxy-server='+self.PROXY_HOST+':'+str(self.PROXY_PORT)
]
#browser = await launch(headless=False, ignoreHTTPSErrors=True, userDataDir='./tmp', options={'args': args})
browser = await launch(headless=False, options={'args': args})
browser_context = await browser.createIncognitoBrowserContext()
page = await browser_context.newPage()
await page.setUserAgent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36')
await page.setExtraHTTPHeaders({
"X-Frame-Options": "GOFORIT"
})
await page.setViewport({
'width': 1366,
'height': 768
})
await page.authenticate({'username': self.PROXY_USER, 'password': self.PROXY_PASS})
await page.setRequestInterception(True)
async def intercept(request):
if "origem=PD" in request.url:
await request.continue_({
"headers": {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' : 'en-US,en;q=0.5',
'Accept-Encoding' : 'gzip',
'DNT' : '1',
'Connection' : 'close'
}
})
else:
await request.continue_()
# intercept links
page.on('request', lambda req: asyncio.ensure_future(intercept(req)))
# wait for navigation
await asyncio.wait([page.goto(url, {'waitUntil': 'networkidle2'}), page.waitForNavigation()])
# get content
content = await page.content()