You can open the file and close it so it will remove the content of the file.
class RestaurantDetailSpider(scrapy.Spider):
file = open('./restaurantsLink.csv','w')
file.close()
urls = list(open('./restaurantsLink.csv'))
urls = urls[1:]
print "Url List Found : " + str(len(urls))
name = "RestaurantDetailSpider"
start_urls = urls
def safeStr(self, obj):
try:
if obj == None:
return obj
return str(obj)
except UnicodeEncodeError as e:
return obj.encode('utf8', 'ignore').decode('utf8')
return ""
def parse(self, response):
try :
detail = RestaurantDetailItem()
HEADING = self.safeStr(response.css('#HEADING::text').extract_first())
if HEADING is not None:
if ',' in HEADING:
HEADING = "'" + HEADING + "'"
detail['Name'] = HEADING
CONTACT_INFO = self.safeStr(response.css('.directContactInfo *::text').extract_first())
if CONTACT_INFO is not None:
if ',' in CONTACT_INFO:
CONTACT_INFO = "'" + CONTACT_INFO + "'"
detail['Phone'] = CONTACT_INFO
ADDRESS_LIST = response.css('.headerBL .address *::text').extract()
if ADDRESS_LIST is not None:
ADDRESS = ', '.join([self.safeStr(x) for x in ADDRESS_LIST])
ADDRESS = ADDRESS.replace(',','')
detail['Address'] = ADDRESS
EMAIL = self.safeStr(response.css('#RESTAURANT_DETAILS .detailsContent a::attr(href)').extract_first())
if EMAIL is not None:
EMAIL = EMAIL.replace('mailto:','')
detail['Email'] = EMAIL
TYPE_LIST = response.css('.rating_and_popularity .header_links *::text').extract()
if TYPE_LIST is not None:
TYPE = ', '.join([self.safeStr(x) for x in TYPE_LIST])
TYPE = TYPE.replace(',','')
detail['Type'] = TYPE
yield detail
except Exception as e:
print "Error occure"
yield None
scrapy crawl RestaurantMainSpider -t csv -o restaurantsLink.csv
this will create the restaurantsLink.csv file
which I am using in my next spider RestaurantDetailSpider
.
So you can run the following command -- it will remove and create a new file restaurantsLink.csv which we are going to use in the above spider and it will be overridden whenever we run the spider:
rm restaurantsLink.csv && scrapy crawl RestaurantMainSpider -o restaurantsLink.csv -t csv