my scrapy not saving data into database. please suggest. it is scraping data,, but not adding those data into the database.. please look into the codes and sggest something,..
My spider.py file
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from project2spider.items import Project2Item
from scrapy.http import Request
class ProjectSpider(BaseSpider):
name = "project2spider"
allowed_domains = ["http://directory.thesun.co.uk"]
current_page_no = 1
start_urls = [
"http://directory.thesun.co.uk/find/uk/computer-repair"
]
def get_next_url(self, fired_url):
if '/page/' in fired_url:
url, page_no = fired_url.rsplit('/page/', 1)
else:
if self.current_page_no != 1:
#end of scroll
return
self.current_page_no += 1
return "http://directory.thesun.co.uk/find/uk/computer-repair/page/%s" % self.current_page_no
def parse(self, response):
fired_url = response.url
hxs = HtmlXPathSelector(response)
sites = hxs.select('//div[@class="abTbl "]')
for site in sites:
item = Project2Item()
item['Catogory'] = site.select('span[@class="icListBusType"]/text()').extract()
item['Bussiness_name'] = site.select('a/@title').extract()
item['Description'] = site.select('span[last()]/text()').extract()
item['Number'] = site.select('span[@class="searchInfoLabel"]/span/@id').extract()
item['Web_url'] = site.select('span[@class="searchInfoLabel"]/a/@href').extract()
item['adress_name'] = site.select('span[@class="searchInfoLabel"]/span/text()').extract()
item['Photo_name'] = site.select('img/@alt').extract()
item['Photo_path'] = site.select('img/@src').extract()
yield item
next_url = self.get_next_url(fired_url)
if next_url:
yield Request(next_url, self.parse, dont_filter=True)
`
And my pipelines.py file::
from scrapy import log
from twisted.enterprise import adbapi
import MySQLdb.cursors
# the required Pipeline settings.
class MySQLStorePipeline(object):
def __init__(self):
# db settings
self.dbpool = adbapi.ConnectionPool('MySQLdb',
db='project2',
user='root',
passwd='',
host='127.0.0.1',
port='3306',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, item, spider):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
#runs the condition
insert_id = tx.execute(\
"insert into crawlerapp_directory (Catogory, Bussiness_name, Description, Number, Web_url) "
"values (%s, %s, %s, %s, %s)",
(item['Catogory'][0],
item['Bussiness_name'][0],
item['Description'][0],
item['Number'][0],
item['Web_url'][0],
)
)
#connection to the foreign key Adress.
tx.execute(\
"insert into crawlerapp_adress (directory_id, adress_name) "
"values (%s, %s)",
(insert_id,
item['adress_name'][0]
)
)
#connection to the foreign key Photos.
tx.execute(\
"insert into crawlerapp_photos (directory_id, Photo_path, Photo_name) "
"values (%s, %s, %s)",
(insert_id,
item['Photo_path'][0],
item['Photo_name'][0]
)
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
I am not being able to save data into my database please help