I am new to scrapy and python in general and i am trying to make a scraper that extracts links from a page then edit these links then go through each one of them .. I am using playwright with scrapy.
this is where i am at but for some reason it only scrapes the first link only.
def parse(self, response):
for link in response.css('div.som a::attr(href)'):
yield response.follow(link.get().replace('docs', 'www').replace('com/', 'com/#'),
cookies={'__utms': '265273107'},
meta=dict(
playwright=True,
playwright_include_page=True,
playwright_page_coroutines=[
PageCoroutine('wait_for_selector', 'span#pple_numbers')]
),
callback=self.parse_c)
async def parse_c(self, response):
yield {
'text': response.css('div.pple_numb span::text').getall()