i have a running solution using scrapy_selenium for a site with javascript-loading. As you can see in the code below the the SeleniumRequest is used when yielding the detailPage with parseDetails -
But what can i do when i need the SeleniumRequest allready at my mainpage (and not only the detail-page like below)?
How can i use the SeleniumRequest in that situation?
import scrapy
from scrapy_selenium import SeleniumRequest
class ZoosSpider(scrapy.Spider):
name = 'zoos'
allowed_domains = ['www.tripadvisor.co.uk']
start_urls = [
"https://www.tripadvisor.co.uk/Attractions-g186216-Activities-c53-a_allAttractions.true-United_Kingdom.html"
]
existList = []
def parse(self, response):
tmpSEC = response.xpath("//section[@data-automation='AppPresentation_SingleFlexCardSection']")
for elem in tmpSEC:
link = response.urljoin(elem.xpath(".//a/@href").get())
yield SeleniumRequest(
url=link,
wait_time= 10,
callback=self.parseDetails)
def parseDetails(self, response):
tmpName = response.xpath("//h1[@data-automation='mainH1']/text()").get()
tmpLink = response.xpath("//div[@class='Lvkmj']/a/@href").getall()
tmpURL = tmpTelnr = tmpMail = "N/A"
yield {
"Name": tmpName,
"URL": tmpURL,
}