I am facing an error with this code. Can anyone help me with it so I can automate the process of downloading all the images in the CSV file that contain all the URLs of the images?
The error I am getting is:
URLError Traceback (most recent call last)
<ipython-input-320-dcd87f841181> in <module>
19 urlShort = re.search(filejpg, str(r)).group()
20 print(urlShort)
---> 21 download(x, f'{di}/{urlShort}')
22 print(type(x))
URLError: <urlopen error unknown url type: {'https>
This is the code I am using:
from pathlib import Path
from shutil import rmtree as delete
from urllib.request import urlretrieve as download
from gazpacho import get, Soup
import re
import pandas as pd
import numpy as np
#import data
df = pd.read_csv('urlReady1.csv')
df.shape
#locate folder
di = 'Dubai'
Path(di).mkdir(exist_ok=True)
#change data to dict
dict_copy = df.to_dict('records')
#iterate over every row of the data and download the jpg file
for r in dict_copy:
if r == 'urlready':
print("header")
else:
x = str(r)
filejpg = "[\d]{1,}\.jpg"
urlShort = re.search(filejpg, str(r)).group()
print(urlShort)
download(x, f'{di}/{urlShort}')
print(type(x))