I'm trying to run this code, the webdriver opens the page but soon after it stops working and I receive and error: AttributeError: 'dict' object has no attribute 'dont_filter'. This is my code:
import scrapy
from scrapy import Spider
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from scrapy.selector import Selector
from scrapy.http import Request
class RentalMarketSpider(Spider):
name = 'rental_market'
allowed_domains = ['home.co.uk']
def start_requests(self):
s=Service('/Users/chrisb/Desktop/Scrape/Home/chromedriver')
self.driver = webdriver.Chrome(service=s)
self.driver.get('https://www.home.co.uk/for_rent/ampthill/current_rents?location=ampthill')
sel = Selector(text=self.driver.page_source)
tot_prop_rent = sel.xpath('.//div[1]/table/tbody/tr[1]/td[2]/text()').extract_first()
last_14_days = sel.xpath('.//div[1]/table/tbody/tr[2]/td[2]/text()').extract_first()
average = sel.xpath('.//div[1]/table/tbody/tr[3]/td[2]/text()').extract_first()
median = sel.xpath('.//div[1]/table/tbody/tr[4]/td[2]/text()').extract_first()
one_b_num_prop = sel.xpath('.//div[3]/table/tbody/tr[2]/td[2]/text()').extract_first()
one_b_average = sel.xpath('.//div[3]/table/tbody/tr[2]/td[3]/text()').extract_first()
yield {
'tot_prop_rent': tot_prop_rent,
'last_14_days': last_14_days,
'average': average,
'median': median,
'one_b_num_prop': one_b_num_prop,
'one_b_average': one_b_average
}
Below is the full error I receive. I looked everywhere but couldn't find a clear answer in order to get rid of this error:
2021-12-23 17:43:26 [twisted] CRITICAL: Unhandled Error
Traceback (most recent call last):
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/scrapy/commands/crawl.py", line 27, in run
self.crawler_process.start()
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/scrapy/crawler.py", line 327, in start
reactor.run(installSignalHandlers=False) # blocking call
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/twisted/internet/base.py", line 1318, in run
self.mainLoop()
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/twisted/internet/base.py", line 1328, in mainLoop
reactorBaseSelf.runUntilCurrent()
--- <exception caught here> ---
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/twisted/internet/base.py", line 994, in runUntilCurrent
call.func(*call.args, **call.kw)
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/scrapy/utils/reactor.py", line 50, in __call__
return self._func(*self._a, **self._kw)
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/scrapy/core/engine.py", line 137, in _next_request
self.crawl(request, spider)
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/scrapy/core/engine.py", line 218, in crawl
self.schedule(request, spider)
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/scrapy/core/engine.py", line 223, in schedule
if not self.slot.scheduler.enqueue_request(request):
File "/Users/chrisb/opt/anaconda3/lib/python3.8/site-packages/scrapy/core/scheduler.py", line 78, in enqueue_request
if not request.dont_filter and self.df.request_seen(request):
builtins.AttributeError: 'dict' object has no attribute 'dont_filter'
2021-12-23 17:43:26 [scrapy.core.engine] INFO: Closing spider (finished)
Any advice would be appreciated. Thanks for your time.