I am trying to execute this script but I don't know why it is throwing 'Null' and duplicate value at the same time! My goal is to put the necessary value and click the search button, get all the 'href' from the page and collect the data, which is working fine but providing the 'Null' and duplicate value at the same time!. I don't know what I am actually missing here.
import scrapy
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
class RightMove2Spider(scrapy.Spider):
name = 'rightmove2'
start_urls = ["https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"]
def __init__(self, name=None, **kwargs):
chrome_options = Options()
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.set_window_size(1920, 1080)
driver.get("https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale")
price_range = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "(//option[@value='2000000'])[2]")))
price_range.click()
time.sleep(1)
bedroom_range = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "(//option[@value='5'])[1]")))
bedroom_range.click()
time.sleep(1)
tick_box = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[@class='tickbox--indicator']")))
tick_box.click()
time.sleep(1)
find_properties_btn = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[@id='submit']")))
find_properties_btn.click()
time.sleep(3)
self.property_xpath = driver.find_elements(By.XPATH, "//*[@class='l-searchResult is-list']/div/div/div[4]/div[1]/div[2]/a")
# driver.close()
super().__init__(name, **kwargs)
def parse(self, response):
for el in self.property_xpath:
href= el.get_attribute('href')
time.sleep(1)
yield SeleniumRequest(
url=href,
wait_time=3)
yield {
'Title': response.xpath("//h1[@itemprop='streetAddress']/text()").get(),
'Price': response.xpath("//div[@class='_1gfnqJ3Vtd1z40MlC0MzXu']/span/text()").get(),
'Agent Name': response.xpath("//div[@class='RPNfwwZBarvBLs58-mdN8']/a/text()").get(),
'Agent Address': response.xpath("//div[@class='OojFk4MTxFDKIfqreGNt0']/text()").get(),
'Agent Telephone': response.xpath("//a[@class='_3E1fAHUmQ27HFUFIBdrW0u']/text()").get(),
'Added on': response.xpath("//div[@class='_2nk2x6QhNB1UrxdI5KpvaF']/text()").get(),
'Links': response.url
}
for x in range(24, 1008, 24):
abs_url = f'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87490&minBedrooms=5&maxPrice=2000000&index={x}&propertyTypes=&includeSSTC=true&mustHave=&dontShow=&furnishTypes=&keywords='
yield SeleniumRequest(
url= abs_url,
callback=self.parse
)
output
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": "Combwell Crescent, Abbey Wood, London", "Price": "£450,000", "Agent Name": "Anthony Martin Estate Agents, Bexleyheath", "Agent Address": "2 Pickford Lane,\r\nBexleyheath,\r\nDA7 4QW", "Agent Telephone": "020 8012 7475", "Added on": "Added on 30/11/2021", "Links": "https://www.rightmove.co.uk/properties/117050312"},
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": "Combwell Crescent, Abbey Wood, London", "Price": "£450,000", "Agent Name": "Anthony Martin Estate Agents, Bexleyheath", "Agent Address": "2 Pickford Lane,\r\nBexleyheath,\r\nDA7 4QW", "Agent Telephone": "020 8012 7475", "Added on": "Added on 30/11/2021", "Links": "https://www.rightmove.co.uk/properties/117050312"},
{"Title": null, "Price": null, "Agent Name": null, "Agent Address": null, "Agent Telephone": null, "Added on": null, "Links": "https://www.rightmove.co.uk/property-for-sale/search.html?searchLocation=London&useLocationIdentifier=true&locationIdentifier=REGION%5E87490&buy=For+sale"},
{"Title": "Combwell Crescent, Abbey Wood, London", "Price": "£450,000", "Agent Name": "Anthony Martin Estate Agents, Bexleyheath", "Agent Address": "2 Pickford Lane,\r\nBexleyheath,\r\nDA7 4QW", "Agent Telephone": "020 8012 7475", "Added on": "Added on 30/11/2021", "Links": "https://www.rightmove.co.uk/properties/117050312"},