**I Tried to Run this scrapy Query to download the all the related PDF from given URL **
I tried to execute this using "scrapy crawl mySpider"
import urlparse
import scrapy
from scrapy.http import Request
class pwc_tax(scrapy.Spider):
name = "sec_gov"
allowed_domains = ["www.sec.gov"]
start_urls = ["https://secsearch.sec.gov/search?utf8=%3F&affiliate=secsearch&query=exhibit+10"]
def parse(self, response):
for href in response.css('div#all_results h3 a::attr(href)').extract():
yield Request(
url=response.urljoin(href),
callback=self.parse_article
)
def parse_article(self, response):
for href in response.css('div.download_wrapper a[href$=".pdf"]::attr(href)').extract():
yield Request(
url=response.urljoin(href),
callback=self.save_pdf
)
def save_pdf(self, response):
path = response.url.split('/')[-1]
self.logger.info('Saving PDF %s', path)
with open(path, 'wb') as f:
f.write(response.body)
Can anyone help me with this ? Thanks in Advance.