3

I'm trying to do a webscraping on youtube to get the information from a video, however it is giving an error and it seems that it is in the renders () of requests_html, code below:

from requests_html import AsyncHTMLSession
import pyppdf.patch_pyppeteer
import pyppeteer

import asyncio
if asyncio.get_event_loop().is_running(): # Only patch if needed (i.e. running in Notebook, Spyder, etc)
    import nest_asyncio
    nest_asyncio.apply()
    
url = "https://www.youtube.com/watch?v=3vY2L0ikq8w"

session = AsyncHTMLSession()
# inserindo a query e o numero da pagina na url
print(url)
# enviado requisição para o youtube
response = await session.get(url)
# executando Java-script
await response.html.render(sleep=1)
# renomear o arquivo
link_name = re.search('v=(.*)', link).group(1)
# Salvando arquivo HTML na pasta dados_brutos
with open("./dados_brutos/videos/video_{}.html".format(link_name), 'w+', encoding='utf8') as output:
    output.write(response.html.html)
    
time.sleep(2)

error description

--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) in async-def-wrapper() 19 # renomear o arquivo 20 link_name = re.search('v=(.*)', link).group(1) ---> 21 # Salvando arquivo HTML na pasta dados_brutos 22 with open("./dados_brutos/videos/video_{}.html".format(link_name), 'w+', encoding='utf8') as output: 23 output.write(response.html.html)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\requests_html.py in render(self, retries, script, wait, scrolldown, sleep, reload, timeout, keep_page) 596 try: 597 --> 598 content, result, page = self.session.loop.run_until_complete(self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page)) 599 except TypeError: 600 pass

~\AppData\Local\Continuum\anaconda3\lib\site-packages\nest_asyncio.py in run_until_complete(self, future) 93 raise RuntimeError( 94 'Event loop stopped before Future completed.') ---> 95 return f.result() 96 finally: 97 events._set_running_loop(old_running_loop)

~\AppData\Local\Continuum\anaconda3\lib\asyncio\futures.py in result(self) 176 self.__log_traceback = False 177 if self._exception is not None: --> 178 raise self._exception 179 return self._result 180

~\AppData\Local\Continuum\anaconda3\lib\asyncio\tasks.py in __step(failed resolving arguments) 221 # We use the send method directly, because coroutines 222 # don't have __iter__ and __next__ methods. --> 223 result = coro.send(None) 224 else: 225 result = coro.throw(exc)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\requests_html.py in _async_render(self, url, script, scrolldown, sleep, wait, reload, content, timeout, keep_page) 503 """ Handle page creation and js rendering. Internal use for render/arender methods. """ 504 try: --> 505 page = await self.browser.newPage() 506 507 # Wait before rendering the page, to prevent timeouts.

AttributeError: 'coroutine' object has no attribute 'newPage'

help me run this code

1 Answers1

0

change your render with arender and session with asession

response = await asession.get(url)
# executando Java-script
await response.html.arender(sleep=1)
Faiza
  • 1
  • 5
  • Your answer could be improved with additional supporting information. Please [edit] to add further details, such as citations or documentation, so that others can confirm that your answer is correct. You can find more information on how to write good answers [in the help center](/help/how-to-answer). – Community Sep 10 '22 at 15:58