So I want to create web scraper that will work from inputting URL, classes, elements and headers from console. I want to put e
and f
variable in d = list.find(g, class_=h).text.replace('\n', '')
I came up with this code, but it gives me error: AttributeError: 'NoneType' object has no attribute 'text'
. Seems like I have something incorrect in here: d = list.find(g, class_=h).text.replace('\n', '')
from pydoc import classname
from bs4 import BeautifulSoup
import requests
from csv import writer
header = []
element = []
classname = []
url = input("URL: ")
page = requests.get(str(url))
soup = BeautifulSoup(page.content, 'html.parser')
x = input("Enter element of first class (e.g div): ")
y = input("Enter class name for that element: ")
lists = soup.find_all(str(x), class_=str(y))
z = input("Enter file name you want to write data in (must end with .csv): ")
a =" "
while a!= "":
a = input("Enter headers: ")
if a!= "":
header.append(a)
print(header)
with open(str(z), 'w', encoding='utf8', newline='') as f:
thewriter = writer(f)
thewriter.writerow(header)
c = len(header)
for i in range(c):
e = input("Enter element of first class (e.g div): ")
element.append(e)
f = input("Enter class name for that element: ")
classname.append(f)
for list in lists:
for i in range(c):
d = header[i]
g = e[i]
h = f[i]
d = list.find(g, class_=h).text.replace('\n', '')
info = d
thewriter.writerow(info)