1

I have the following code which should download images into designated directories from given urls in a csv file. The directories are all setup.

with open('images.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    next(csv_reader)
    for row in csv_reader:
        basename = os.path.basename(urlparse(row[0]).path)
        filename = '{}/{}/{}'.format(row[2], row[1], basename)
        urllib.request.urlretrieve(row[0], filename)

The csv file is organized in the following way:

http://farm2.static.flickr.com/1245/1259825348_6a2aa94e8d.jpg,cat,train
http://farm1.static.flickr.com/146/350588612_d84d71cc59.jpg,cat,test
http://farm1.static.flickr.com/32/99029168_940da3a1e5.jpg,cat,val

But when I execute the code I get the following error. I only just figured out today how to even download images from urls with python, so I would be really grateful for all help on this matter!:

---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
<ipython-input-36-6e201d3625d3> in <module>
      5         basename = os.path.basename(urlparse(row[0]).path)
      6         filename = '{}/{}/{}'.format(row[2], row[1], basename)
----> 7         urllib.request.urlretrieve(row[0], filename)

~\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
    245     url_type, path = splittype(url)
    246 
--> 247     with contextlib.closing(urlopen(url, data)) as fp:
    248         headers = fp.info()
    249 

~\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):

~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    529         for processor in self.process_response.get(protocol, []):
    530             meth = getattr(processor, meth_name)
--> 531             response = meth(req, response)
    532 
    533         return response

~\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
    639         if not (200 <= code < 300):
    640             response = self.parent.error(
--> 641                 'http', request, response, code, msg, hdrs)
    642 
    643         return response

~\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
    561             http_err = 0
    562         args = (dict, proto, meth_name) + args
--> 563         result = self._call_chain(*args)
    564         if result:
    565             return result

~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    501         for handler in handlers:
    502             func = getattr(handler, meth_name)
--> 503             result = func(*args)
    504             if result is not None:
    505                 return result

~\Anaconda3\lib\urllib\request.py in http_error_302(self, req, fp, code, msg, headers)
    753         fp.close()
    754 
--> 755         return self.parent.open(new, timeout=req.timeout)
    756 
    757     http_error_301 = http_error_303 = http_error_307 = http_error_302

~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    529         for processor in self.process_response.get(protocol, []):
    530             meth = getattr(processor, meth_name)
--> 531             response = meth(req, response)
    532 
    533         return response

~\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
    639         if not (200 <= code < 300):
    640             response = self.parent.error(
--> 641                 'http', request, response, code, msg, hdrs)
    642 
    643         return response

~\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
    567         if http_err:
    568             args = (dict, 'default', 'http_error_default') + orig_args
--> 569             return self._call_chain(*args)
    570 
    571 # XXX probably also want an abstract factory that knows when it makes

~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    501         for handler in handlers:
    502             func = getattr(handler, meth_name)
--> 503             result = func(*args)
    504             if result is not None:
    505                 return result

~\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
    647 class HTTPDefaultErrorHandler(BaseHandler):
    648     def http_error_default(self, req, fp, code, msg, hdrs):
--> 649         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    650 
    651 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 404: Not Found
  • I copy-pasted what you had and made the directories and was able to download all the cat pictures. The only difference, as far I can tell, is that I'm not using Anaconda; I'm using venv from python3 so I had to replace `urlparse ` with `urllib.parse.urlparse` – afghanimah Apr 24 '20 at 21:04
  • The total amount of urls are about 1400, when I run the code it manages to download the first 15 until this error pops up. – Enrique Lopez Apr 25 '20 at 07:23
  • In that case, since it's a 404 error, my guess is that the URL for that picture is the problem. Try going to the problem URL in your browser manually and see if you can even get to it. – afghanimah Apr 25 '20 at 19:47
  • Thanks for the help! There were a few images in the csv file with HTTPError 404 and 410. – Enrique Lopez May 20 '20 at 15:01

0 Answers0