Blockquote
I am trying to extract the href link and then i joined the url and then i give the seleniumRequest with the joined url my code works and it crawls the data but at the result it produce the same repetative data the data that are crawled will be repetative
it look like everything is fine and no error but the output is repetative and data are also coming from different product link
############# STACK OVERFLOW PLAESE HELP I'm AN BEGGINER IN SCRAPY WITH SELENIUM ###########
############# I THINK SOMETHING WITH MY PRODUCT PRICE URL #############
############# SOMETHING WRONG WITH URL #############################
#########This is my code
import scrapy
from scrapy.selector import Selector
from scrapy_selenium import SeleniumRequest
from selenium.webdriver.common.keys import Keys
from time import sleep
class AmazonSpider(scrapy.Spider):
name = 'Amazon'
def start_requests(self):
yield SeleniumRequest(
url='https://www.amazon.com',
wait_time=3,
screenshot=True,
callback=self.parse
)
def parse(self, response):
driver = response.meta['driver']
search_input =
driver.find_element_by_xpath("//input[@id='twotabsearchtextbox']")
search_input.send_keys('smartphones')
search_input.send_keys(Keys.ENTER)
html = driver.page_source
response_obj = Selector(text=html)
driver.set_window_size(1920, 1080)
links = response_obj.xpath("//h2[@class='a-size-mini a-spacing-none a-color-base s-line-clamp-2']/a")
for link in links:
product_link = link.xpath(".//@href").get()
absolute_url = response.urljoin(product_link)
yield SeleniumRequest(url=absolute_url, wait_time=8, callback=self.parse_price,
dont_filter=True )
def parse_price(self, response):
driver = response.meta['driver']
htmlbody = driver.page_source
response_object = Selector(text=htmlbody)
driver.set_window_size(1920, 1080)
sleep(5)
name = response_object.xpath("//h1[@class='a-size-large a-spacing-none']/span").get()
yield {
'name': name
}