I am trying to scrape from the website https://tonaton.com/en/ads/ghana/electronics. There is a "next" button that I want to click and scrape the contents. The problem is the xpath or css selector of that button does not return any value in neither the scrapy shell nor splash and I am stuck. I can't get in to scrape what I need to. Please any help? This is how far I have been able to come but I'm not getting the right results.
# -*- coding: utf-8 -*-
import scrapy import scrapy_selenium from scrapy_selenium import SeleniumRequest
class VisionSpider(scrapy.Spider): name = 'vision'
def start_requests(self):
yield SeleniumRequest(
url= 'https://tonaton.com',
wait_time=3,
screenshot=True,
callback=self.parse
)
def parse(self, response):
businesses = response.xpath(
"//a[@class='link--1t8hM gtm-home-category-link-click']")
for business in businesses:
link = business.xpath(".//@href").get()
category = business.xpath(".//div[2]/p/text()").get()
yield response.follow(url=link, callback=self.parse_business, meta={'business_category': category})
def parse_business(self, response):
category = response.request.meta['business_category']
rows = response.xpath("//a[@class='card-link--3ssYv gtm-ad-item']")
for row in rows:
new_link = row.xpath(".//@href").get()
yield response.follow(url=new_link, callback=self.next_parse, meta={'business_category': category})
next_page = response.xpath("//div[@class = 'action-button--1O8tU']")
if next_page:
button = next_page.click()
yield SeleniumRequest(
url=button,
wait_time=3,
callback=self.parse
)
def next_parse(self, response):
category = response.request.meta['business_category']
lines = response.xpath("//a[@class='member-link--IzDly gtm-visit-shop']")
for line in lines:
next_link = line.xpath(".//@href").get()
yield response.follow(url=next_link, callback=self.another_parse, meta={'business_category': category})
def another_parse(self, response):
category = response.request.meta['business_category']
button = response.xpath("//button[@class = 'contact-section--1qlvP gtm-show-number']").click()
yield response.follow(url=button, callback=self.new_parse, meta={'business_category': category})
def new_parse(self, response):
category = response.request.meta['business_category']
times = response.xpath("//div[@class='info-container--3pMhK']")
for time in times:
name = time.xpath(".//div/span/text()").get()
location = time.xpath(".//div/div/div/span/text()").get()
phone = time.xpath(".//div[3]/div/button/div[2]/div/text()").get()
yield {
'business_category': category,
'business_name': name,
'phone': phone,
'location': location
}