Is it possible to execute CrawlSpider using Playwright integration for Scrapy? I am trying the following script to execute a CrawlSpider but it does not scrape anything. It also does not show any error!
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class GumtreeCrawlSpider(CrawlSpider):
name = 'gumtree_crawl'
allowed_domains = ['www.gumtree.com']
def start_requests(self):
yield scrapy.Request(
url='https://www.gumtree.com/property-for-sale/london/page',
meta={"playwright": True}
)
return super().start_requests()
rules = (
Rule(LinkExtractor(restrict_xpaths="//div[@class='grid-col-12']/ul[1]/li/article/a"), callback='parse_item', follow=False),
)
async def parse_item(self, response):
yield {
'Title': response.xpath("//div[@class='css-w50tn5 e1pt9h6u11']/h1/text()").get(),
'Price': response.xpath("//h3[@itemprop='price']/text()").get(),
'Add Posted': response.xpath("//dl[@class='css-16xsajr elf7h8q4'][1]/dd/text()").get(),
'Links': response.url
}