I found similar questions have been asked already but none of the answers explained my situation. Ans I also need help implementing the second part of my code.
here the code :
import scrapy
import json
class MySpider(scrapy.Spider):
name = "myspider"
def start_requests(self):
urls = [
'https://www.lin1.com',
'https://www.link2.com'
]
for url in urls:
yield scrapy.Request(url= url, callback=self.parse_result, meta={'url':url})
def parse_result(self, response):
url = response.meta['url']
tag = response.xpath('//xxxxxxx/text()').get()
if tag is not None:
jsonData = json.loads(tag)
items_list = jsonData['x']['y']['z']
for index, item in enumerate(items_list):
self.parse_item(item, index + 1, url)
################ # IF I yield here I get some output #########################
# yield {
# 'url': url,
# 'index': index,
# 'title': item.get('name'),
# 'link': item.get('itemUrl'),
# 'item_id': item.get('itemId'),
# 'rating': item['rating'].get('averageRating'),
# 'price': item.get('price')
# }
# Request Next Page
next_page = response.css('li.next a::attr(href)').get
if next_page is not None:
yield response.follow(next_page, callback = self.parse_result , meta={'url':
url})
#yield scrapy.Request(url= url+next_page, callback = self.parse_result, meta=
{'url': url})
def parse_item(self, item_data, index, url):
############# This is not getting called ##############
yield {
'url': url,
'index': index,
'title': item_data.get('name'),
'link': item_data.get('itemUrl'),
'item_id': item_data.get('itemId'),
'rating': item_data['rating'].get('averageRating'),
'price': item_data.get('price')
}
1 - When I yield directly from the parse_result function, i get at least some output, but when I use the parse_item function, nothing is showing 2- Also, how do i put all the items from one (url) into a list before sending it to my front end? Thanks fro the help.