This is the entire code ,this scrapes product details lke name price and link from an e-commerce website and displays the output and gives a csv file as well as displays graph.
from bs4 import BeautifulSoup
import requests
import re
def get_data():
global items
while True:
search = input("What product do you want to search for?(Type '0' to quit) \n")
if search == '0':
break
else:
url = f"https://www.newegg.ca/p/pl?d={search}&N=4131"
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")
page_text = doc.find(class_="list-tool-pagination-text").strong
pages = int(str(page_text).split("/")[-2].split(">")[-1][:-1])
items_found = {}
for page in range(1, pages + 1):
url = f"https://www.newegg.ca/p/pl?d={search}&N=4131&page={page}"
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")
try:
div = doc.find(class_="item-cells-wrap border-cells items-grid-view four-cells expulsion-one-cell")
items = div.find_all(text=re.compile(search))
except Exception:
print('Product not found')
for item in items:
parent = item.parent
if parent.name != "a":
continue
link = parent['href']
next_parent = item.find_parent(class_="item-container")
try:
price = next_parent.find(class_="price-current").find("strong").string
items_found[item] = {"price": int(price.replace(",", "")), "link": link}
except:
pass
sorted_items = sorted(items_found.items(), key=lambda x: x[1]['price'])
data = []
for item in sorted_items:
print(item[0])
print(f"${item[1]['price']}")
print(item[1]['link'])
print("-------------------------------")
data.append([item[0], item[1]['price'], item[1]['link']])
if __name__ == '__main__':
get_data()
This code is working on my local machine but not in google colab. Can anyone please give me a solution
The error I get:
<ipython-input-1-4fc0ee293c1c> in <module>()
90
91 if __name__ == '__main__':
---> 92 get_data()
<ipython-input-1-4fc0ee293c1c> in get_data()
25
26 # finding the product page
---> 27 page_text = doc.find(class_="list-tool-pagination-text").strong
28 # finding the number of pages of the products
29 pages = int(str(page_text).split("/")[-2].split(">")[-1][:-1])
AttributeError: 'NoneType' object has no attribute 'strong'```