getting everything as none in output cannot figure out the issue in the code
Scraping details of top 1000 rated movies on imdb
Link :- https://www.imdb.com/search/title/?count=100&groups=top_1000&sort=user_rating
CODE
# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class BestMoviesSpider(CrawlSpider):
name = 'best_movies'
allowed_domains = ['imdb.com']
start_urls = ['https://www.imdb.com/search/title/?count=100&groups=top_1000&sort=user_rating']
rules = (
Rule(LinkExtractor(restrict_xpaths="//h3[@class='lister-item-header']/a "), callback='parse_item', follow=True),
)
def parse_item(self, response):
yield{
'title' : response.xpath("//h1[@class='TitleHeader__TitleText-sc-1wu6n3d-0 cLNRlG']/text()").get(),
'year' : response.xpath("(//li/span[@class='TitleBlockMetaData__ListItemText-sc-12ein40-2 jedhex'])[1]/text()").get(),
'duration' : response.xpath("(//li[@class='ipc-inline-list__item'])[3]/text()").get(),
'rating' : response.xpath("(//span[@class='AggregateRatingButton__RatingScore-sc-1il8omz-1 fhMjqK'])[2]/text()").get(),
'director' : response.xpath("(//a[@class='ipc-metadata-list-item__list-content-item ipc-metadata-list-item__list-content-item--link'])[13]/text()").get(),
'movie_url' : response.url
}