I am trying to scrape product details such as product name,price,category,color from https://nike.co.in Despite giving the correct Xpath to the script, It does not seem to be scraping the details and it gives an empty list. Here's my complete script:
import time
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
def scrape_nike(shop_by_category):
website_address = ['https://nike.co.in']
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_argument("window-size=1200x600")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(ChromeDriverManager().install(), options=options)
delays = [7, 4, 6, 2, 10, 19]
delay = np.random.choice(delays)
for crawler in website_address:
browser.get(crawler)
time.sleep(2)
time.sleep(delay)
browser.find_element_by_xpath('//*[@id="VisualSearchInput"]').send_keys(shop_by_category, Keys.ENTER)
product_price = browser.find_elements_by_xpath('//*[@id="Wall"]/div/div[5]/div/main/section/div/div[1]/div/figure/div/div[3]/div/div/div/div')
product_price_list = [elem.text for elem in product_price]
product_category = browser.find_elements_by_xpath('//*[@id="Wall"]/div/div[5]/div/main/section/div/div[1]/div/figure/div/div[1]/div/div[2]')
product_category_list = [elem.text for elem in product_category]
product_name = browser.find_elements_by_xpath('//*[@id="Nike Air Zoom Vomero 15"]')
product_name_list = [elem.text for elem in product_name]
product_colors = browser.find_elements_by_xpath('//*[@id="Wall"]/div/div[5]/div/main/section/div/div[4]/div/figure/div/div[2]/div/button/div')
product_colors_list = [elem.text for elem in product_colors]
print(product_price_list)
print(product_category_list)
print(product_name_list)
print(product_colors_list)
if __name__ == '__main__':
category_name_list = ['running']
for category in category_name_list:
scrape_nike(category)
The output that I want is something like:
[Rs 1000, Rs 2990, Rs 3000,....]
[Mens running shoes, Womens running shoes, ...]
[Nike Air Zoom Pegasus, Nike Quest 3, ...]
[5 colors, 1 colors, 3 colors, ...]
But the output that I am getting right now is:
[]
[]
[]
[]
What is the exact issue because of which I am getting empty lists? I do not understand. Please help!!
EDIT: I am now able to get just a single product details in my lists, whereas I want all products, here's my change in the code
product_price = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="Wall"]/div/div[5]/div/main/section/div/div[1]/div/figure/div/div[3]/div/div/div/div')))
product_price_list = [elem.text for elem in product_price]
product_category = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="Wall"]/div/div[5]/div/main/section/div/div[1]/div/figure/div/div[1]/div/div[2]')))
product_category_list = [elem.text for elem in product_category]
product_name = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="Nike Air Zoom Vomero 15"]')))
product_name_list = [elem.text for elem in product_name]
product_colors = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="Wall"]/div/div[5]/div/main/section/div/div[4]/div/figure/div/div[2]/div/button/div')))
product_colors_list = [elem.text for elem in product_colors]
This gives:
['₹13,495']
["Men's Running Shoe"]
['Nike Air Zoom Vomero 15']
['5 Colours']
I want multiple such entries
EDIT-2*: I have also tried using beautifulsoup4 but that also returned an empty output.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import pandas as pd
def adidas(shop_by_category):
driver = webdriver.Chrome("F:\\chromedriver\chromedriver.exe")
titles = [] # List to store name of the product
prices = [] # List to store price of the product
category = [] # List to store category of the product
colors = [] # List to store the no of colors of the product
# URL to fetch from Can be looped over / crawled multiple urls
driver.get('https://nike.co.in')
driver.find_element_by_xpath('//*[@id="VisualSearchInput"]').send_keys(shop_by_category, Keys.ENTER)
content = driver.page_source
soup = BeautifulSoup(content, features="lxml")
# Parsing content
for div in soup.findAll('div', attrs={'class': 'product-card__body'}):
name = div.find('div', attrs={'class': 'product-card__title'})
price = div.find('div', attrs={'class': 'product-price css-11s12ax is-current-price'})
subtitle = div.find('div', attrs={'class': 'product-card__subtitle'})
color = div.find('div', attrs={'class': 'product-card__product-count'})
titles.append(name.text)
prices.append(price.text)
category.append(subtitle.text)
colors.append(color.text)
# Storing scraped content
df = pd.DataFrame({'Product Name': titles, 'Price': prices, 'Category': category, 'Colors': colors})
df.to_csv('adidas.csv', index=False, encoding='utf-8')
if __name__ == '__main__':
category_name_list = ['running']
for category in category_name_list:
adidas(category)