I've already crawled the description of the articles. Now, I'm trying to scrape the description of a video from BBC news website, but it returns an empty string. Any advice guys ??!!
This is my code:
class BbcNewsSpider(CrawlSpider):
name = 'BBCNews'
start_urls = ['https://www.bbc.com/']
rules=(Rule(LinkExtractor(restrict_xpaths="//li[contains(@class,'orb-nav-home')]//a",
process_value=lambda x:x[0:16]+'com'),
callback='parse_home'),
Rule(LinkExtractor(allow='bbc.com',
restrict_xpaths='//div[contains(@class,"module__content")]'
'//div[contains(@class,"media") and not
(contains(@class,"media--icon"))]'
'//a[contains(@class,"block-link__overlay-link")]'
, process_value=lambda x: 'https://www.bbc.com' + x if x[0:1] == "/" else x),
callback='parse_item'),
)
This is the function I'm using:
def parse_home(self,response):
if response.status==200:
doc = pq(response.text)
medias = doc('div.media--video').items()
for media in medias:
item=BbcmediaItem()
item['url'] = media.find('a.media__link').attr('href')
item['title']=media.find('a.media__link').text().strip()
item['Type']=media.find('a.media__tag').text()
item['description']=media.find('p.story-body__introduction').text().strip()
yield item