I am looking for guidance around best practices with asyncio and aiohttp in Python 3. I have a basic scraper but I am not sure how to:
- Properly implement error handling. More specific around my fetch function.
- Do I really need the last main function to wrap my async crawler around?
Here is my code so far, it is working but I would like feedback on the two item above.
urls = []
async def fetch(url, payload={}):
async with ClientSession() as s:
async with s.get(url, params=payload) as resp:
content = await resp.read()
return content
async def get_profile_urls(url, payload):
content = await fetch(url, payload)
soup = BeautifulSoup(content, 'html.parser')
soup = soup.find_all(attrs={'class': 'classname'})
if soup:
urls.extend([s.find('a')['href'] for s in soup])
async def main():
tasks = []
payload = {
'page': 0,
'filter': 88}
for i in range(max_page + 1):
payload['page'] += 1
tasks.append(get_profile_urls(search_ulr, payload))
await asyncio.wait(tasks)
asyncio.run(main())