Hi I using scrapy for scrape paginasamarillas.es but I don't get results these are my codes.Please can you help me with this?
from scrapy.item import Item, Field
class AyellItem(Item):
name = Field()
pass
This is the spider
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from ayell.items import AyellItem
class YellSpider(CrawlSpider):
name = 'yell'
allowed_domains = ['http://www.paginasamarillas.es']
start_urls = ['http://www.paginasamarillas.es/alimentacion/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/1']
def parse_items(self, response):
hxs = HtmlXPathSelector(response)
directors = hxs.select("/html/body")
items = []
for directors in directors:
item = AyellItem()
item ["name"] = directors.select("/h1").extract()
items.append(item)
return items
and this what i get
2015-07-31 19:11:25-0300 [yell] DEBUG: Crawled (200) http://www.paginasamarillas.es/alimentacion/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/1> (referer: None) 2015-07-31 19:11:25-0300 [yell] INFO: Closing spider (finished) 2015-07-31 19:11:25-0300 [yell] INFO: Dumping spider stats: {'downloader/request_bytes': 267, 'downloader/request_count': 1,
'downloader/request_method_count/GET': 1,
'downloader/response_bytes': 30509, 'downloader/response_count': 1, 'downloader/response_status_count/200': 1, 'finish_reason': 'finished', 'finish_time': datetime.datetime(2015, 7, 31, 22, 11, 25, 731485), 'scheduler/memory_enqueued': 1,