I have written a python function to rate websites according to some parameters (a series of words). The function uses Python Mechanize and it works fine most of the time.
However, for some websites it just hangs there until I ctrl+c on the terminal. I'm guessing this is some sort of javascript related problem, is there a way to build a time-out function around this?
This is my function:
def rateSite(site_url,comparisonWords):
#open the site
localBrowser = mechanize.Browser()
localBrowser.addheaders = [('User-agent', 'Mozilla/5.1 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/9.0.1')]
localBrowser.set_handle_robots(False)
site = localBrowser.open(site_url,timeout=5000)
html = site.read()
#rate the site
for i in comparisonWords.split():
#do some rating math
return rating
and this is the traceback I get on ctrl+c:
site=localBrowser.open(site_url,timeout=5000)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 209, in open
return self._mech_open(url, data, timeout=timeout)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 236, in _mech_open
response = UserAgentBase.open(self, request, data)
File "/usr/lib/python2.7/dist-packages/mechanize/_opener.py", line 202, in open
response = meth(req, response)
File "/usr/lib/python2.7/dist-packages/mechanize/_http.py", line 612, in http_response
"http", request, response, code, msg, hdrs)
File "/usr/lib/python2.7/dist-packages/mechanize/_opener.py", line 219, in error
result = apply(self._call_chain, args)
File "/usr/lib/python2.7/urllib2.py", line 372, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/dist-packages/mechanize/_http.py", line 146, in http_error_302
return self.parent.open(new)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 209, in open
return self._mech_open(url, data, timeout=timeout)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 236, in _mech_open
response = UserAgentBase.open(self, request, data)
File "/usr/lib/python2.7/dist-packages/mechanize/_opener.py", line 202, in open
response = meth(req, response)
File "/usr/lib/python2.7/dist-packages/mechanize/_http.py", line 612, in http_response
"http", request, response, code, msg, hdrs)
File "/usr/lib/python2.7/dist-packages/mechanize/_opener.py", line 219, in error
result = apply(self._call_chain, args)
File "/usr/lib/python2.7/urllib2.py", line 372, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/dist-packages/mechanize/_http.py", line 146, in http_error_302
return self.parent.open(new)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 209, in open
return self._mech_open(url, data, timeout=timeout)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 236, in _mech_open
response = UserAgentBase.open(self, request, data)
File "/usr/lib/python2.7/dist-packages/mechanize/_opener.py", line 202, in open
response = meth(req, response)
File "/usr/lib/python2.7/dist-packages/mechanize/_http.py", line 612, in http_response
"http", request, response, code, msg, hdrs)
File "/usr/lib/python2.7/dist-packages/mechanize/_opener.py", line 219, in error
result = apply(self._call_chain, args)
File "/usr/lib/python2.7/urllib2.py", line 372, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/dist-packages/mechanize/_http.py", line 146, in http_error_302
return self.parent.open(new)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 209, in open
return self._mech_open(url, data, timeout=timeout)
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 236, in _mech_open
response = UserAgentBase.open(self, request, data)
File "/usr/lib/python2.7/dist-packages/mechanize/_opener.py", line 202, in open
response = meth(req, response)
File "/usr/lib/python2.7/dist-packages/mechanize/_http.py", line 578, in http_response
self._sleep(pause)
KeyboardInterrupt
Any help on how to solve this or build a time-out for it will be greatly appreciated.
Thanks!