I want to stop the spider if some condition is met
I tried doing this:
raise CloseSpider('Some Text')
and
sys.exit("SHUT DOWN EVERYTHING!")
But it is not stopping. Here is the Code writing raise exception instead of return also wont work as the spider continues to crawl:
import scrapy
from scrapy.http import Request
from tutorial.items import DmozItem
from scrapy.exceptions import CloseSpider
import sys
class DmozSpider(scrapy.Spider):
name = "tutorial"
allowed_domain = ["jabong.com"]
start_urls = [
"http://www.jabong.com/women/shoes/sandals/?page=1"
]
page_index = 1
def parse(self,response):
products = response.xpath('//li')
if products:
for product in products:
item = DmozItem()
item_url = product.xpath('@data-url').extract()
item_url = "http://www.jabong.com/" + item_url[0] if item_url else ''
if item_url:
request=Request(url=item_url,callback=self.parse_page2,meta={"item":item},
headers={"Accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"})
request.meta['item'] = item
yield request
else:
return
self.page_index += 1
if self.page_index:
yield Request(url="http://www.jabong.com/women/shoes/sandals/?page=%s" % (self.page_index),
headers={"Referer": "http://www.jabong.com/women/shoes/sandals/",
"X-Requested-With": "XMLHttpRequest"},
callback=self.parse)
def parse_page2(self, response):
sizes=[]
item = response.meta['item']
item['site_name'] = 'jabong'
item['tags'] = ''
yield item
Update: Instead of return even if i raise closespider my spider is not stopping