Requirements: Python 2.7 and no external libraries like Requests or BeautifulSoup :(
I get the error in the traceback from retrieveUrl when I call this url:
u'http://%E7%9F%A5%E3%81%A3%E5%BE%97%E8%A2%8B.biz/wp-content/uploads/2016/10/104743-300x225.jpg'
As you can see, my server already gives me that url nice and uriencoded-ready, but it still blows up.
def retrieveUrl(url):
req = urllib2.Request(url, None, {'User-Agent': 'Mozilla/5.0 (compatible; Anki)'})
filecontents = urllib2.urlopen(req).read()
path = unicode(urllib2.unquote(url.encode("utf8")), "utf8")
filename, file_extension = os.path.splitext(path)
return filename, file_extension, filecontents
Error Traceback
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 431, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 449, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1227, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1194, in do_open
h.request(req.get_method(), req.get_selector(), req.data, headers)
File "C:\Python27\lib\httplib.py", line 1057, in request
self._send_request(method, url, body, headers)
File "C:\Python27\lib\httplib.py", line 1097, in _send_request
self.endheaders(body)
File "C:\Python27\lib\httplib.py", line 1053, in endheaders
self._send_output(message_body)
File "C:\Python27\lib\httplib.py", line 897, in _send_output
self.send(msg)
File "C:\Python27\lib\httplib.py", line 859, in send
self.connect()
File "C:\Python27\lib\httplib.py", line 836, in connect
self.timeout, self.source_address)
File "C:\Python27\lib\socket.py", line 557, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Python27\lib\encodings\idna.py", line 164, in encode
result.append(ToASCII(label))
File "C:\Python27\lib\encodings\idna.py", line 76, in ToASCII
label = nameprep(label)
File "C:\Python27\lib\encodings\idna.py", line 38, in nameprep
raise UnicodeError("Invalid character %r" % c)
UnicodeError: Invalid character u'\x9f'
I haven't managed to even figure out what character u'\x9f' is. Any ideas how I can fix that function to get the filecontents?