When I run the code locally (windows 10) everything works fine. Have checked other answers here and other resources, but failed to figure out any solution. After deploying to ScrapingHub Im getting this error message:
[scrapy.core.scraper] Spider error processing <POST http://oris.co.palm-beach.fl.us/or_web1/new_sch.asp> (referer: http://oris.co.palm-beach.fl.us/or_web1/) Less
Traceback (most recent call last):
File "/usr/local/lib/python3.6/site-packages/twisted/internet/defer.py", line 1299, in _inlineCallbacks
result = g.send(result)
File "/usr/local/lib/python3.6/site-packages/scrapy/core/downloader/middleware.py", line 43, in process_request
defer.returnValue((yield download_func(request=request,spider=spider)))
File "/usr/local/lib/python3.6/site-packages/twisted/internet/defer.py", line 1276, in returnValue
raise _DefGen_Return(val)
twisted.internet.defer._DefGen_Return: <200 http://oris.co.palm-beach.fl.us/or_web1/new_sch.asp>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.6/site-packages/scrapy/core/spidermw.py", line 42, in process_spider_input
result = method(response=response, spider=spider)
File "/usr/local/lib/python3.6/site-packages/scrapy_pagestorage.py", line 68, in process_spider_input
self.save_response(response, spider)
File "/usr/local/lib/python3.6/site-packages/scrapy_pagestorage.py", line 102, in save_response
self._writer.write(payload)
File "/usr/local/lib/python3.6/site-packages/scrapinghub/hubstorage/batchuploader.py", line 224, in write
data = jsonencode(item)
File "/usr/local/lib/python3.6/site-packages/scrapinghub/hubstorage/serialization.py", line 38, in jsonencode
return dumps(o, default=jsondefault)
File "/usr/local/lib/python3.6/json/__init__.py", line 238, in dumps
**kw).encode(obj)
File "/usr/local/lib/python3.6/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/usr/local/lib/python3.6/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
TypeError: keys must be a string
Here is a snippet of my Scrapy function that throws this error
The ToDate
and FromDate
are passed as arguments to spider:
start_urls = ['http://oris.co.palm-beach.fl.us/or_web1/']
def parse(self, response):
# inspect_response(response, self)
url = 'http://oris.co.palm-beach.fl.us/or_web1/new_sch.asp'
headers = {
'upgrade-insecure-requests': "1",
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
'origin': "http://oris.co.palm-beach.fl.us",
'content-type': "application/x-www-form-urlencoded",
'dnt': "1",
'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
'cache-control': "no-cache",
}
# Date range should be visin 90 days
data = {'FromDate': self.FromDate,
'PageSize': '500',
'RecSetSize': '500',
'ToDate': self.ToDate,
'consideration': '',
'search_by': 'DocType',
'search_entry': 'LP'}
body = urlencode(data)
yield scrapy.Request(url, method="POST", headers = headers, body = body, callback = self.parsed)
def parsed(self, response):
# inspect_response(response, self)
# Getting all View urls.
urls=response.xpath("//a[@class = 'list_2']/@href").extract()
for url in urls:
url = url.replace('\r', '').replace('\t','').replace('\n','')
url = url.replace('\r', '').replace('\t','').replace('\n','')
url = response.urljoin(url)
url = url.replace('details.asp','details_des.asp') + '&linked=&party_seq='
yield scrapy.Request(url, callback = self.details)