So if I use await page.waitFor(9000)
or some hard coded wait number,
my function will wait till page loads.
However, await page.goto(url, {'waitUntil': 'networkidle0'})
results in function running before entire page loads, so script fails.
Here is the entire code:
import requests
from bs4 import BeautifulSoup
import time
import os
import pyppeteer
from pyppeteer import launch
import asyncio
import subprocess
AGENT_DIR = os.path.dirname(__file__) + r'\data\agents'
SAVE_FILE = os.path.join(AGENT_DIR, 'latest.txt')
URL = 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/'
def get_latest_agents():
''' We are getting most
common lastest user agents
from the {URL} site
and then saving it to text file {SAVE_FILE}
'''
async def scrape():
url = URL
browser = await launch(headless = False)
page = await browser.newPage()
await page.goto(url, {'waitUntil': 'networkidle0'})
await page.waitFor(9000)
content = await page.content()
soup = BeautifulSoup(content, 'html.parser')
agents = soup.select('.get-the-list')[0].text
#agents = agents.split('\n')
print(agents)
await browser.close()
loop = asyncio.get_event_loop()
response = loop.run_until_complete(scrape())
if __name__ == '__main__':
# first kill all chrome.exe as pypetter doesn't close properly
subprocess.call(['taskkill', '/F', '/im', 'chrome.exe'])
get_latest_agents()
Thank you.