0
import scrapy
import csv
from series.items import SeriesItem

class EpisodeScraperSpider(scrapy.Spider):
    name = "episode_scraper"
    allowed_domains = ["imdb.com"]
    start_urls = []

    def __init__(self, id=None, series=None, *args, **kwargs):
        super(EpisodeScraperSpider, self).__init__(*args, **kwargs)
        if id is not None:
            self.start_urls = ['http://www.imdb.com/title/{!s}/episodes?season={!s}'.format(id, series)]
        else:
            with open('series_episode.csv') as f:
                f_csv = csv.DictReader(f)
                for row in f_csv:
                    self.start_urls.append('http://www.imdb.com/title/{!s}/episodes?season={!s}'.format(row["id"], row["series"]))

    def parse(self, response):
        episodes = response.xpath('//div[contains(@class, "list_item")]')
        title = response.xpath('//h3/a/text()').extract()[0]

        for episode in episodes:
            global title
            item = SeriesItem()

            item['series_episode'] = episode.xpath('div/a/div[contains(@data-const,"tt")]/div/text()').extract()
            item['title'] = '{!s}: {!s}'.format(title, episode.xpath('div[@class="info"]/strong/a/text()').extract())
            item['imdb_id'] = episode.xpath('div[@class="image"]/a/div/@data-const').extract()
            item['airdate'] = [x.strip() for x in episode.xpath('div/div[@class="airdate"]/text()').extract()]
            yield item

When I try this script in scrapyd I got no result. It does have result in scrapy. I think the problem is in this line.

 with open('series_episode.csv') as f:

I don't know where to put my csv file. Please help me!!

Thanks

April
  • 1
  • 1

1 Answers1

0

one option would be to save it in /tmp

 with open('/tmp/series_episode.csv') as f:
cipri.l
  • 819
  • 10
  • 22