I'm a beginner in Python, so please be patient with me.
I want to extract some simple data from an array of URLs. All the URLs HTML-Contents have the same structur, so extracting the data by using a for-loop works out fine.
I use Selenium, because I found out, that the Websites JavaScript changes the initial HTML-Code and I want the final HTML-Code to work on.
For every iteration it takes around 4 seconds, which is accummulated a lot of time. I already found out, that wait.until(page_has_loaded) alone takes half of the time of the code.
import win32com.client as win32
import requests
import openpyxl
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'}
driver_path = 'C:\\webdrivers\\chromedriver.exe'
dir = "C:\\Users\\Me\\OneDrive\\Dokumente_\\Notizen\\CSGOItems\\CSGOItems.xlsx"
workbook = openpyxl.load_workbook(dir)
sheet1 = workbook["Tabelle1"]
sheet2 = workbook["AllPrices"]
URLSkinBit = [
'https://skinbid.com/auctions?mh=Gamma%20Case&sellType=fixed_price&skip=0&take=30&sort=price%23asc&ref=csgoskins',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=gamma&sellType=all',
'https://skinbid.com/auctions?mh=Danger%20Zone%20Case&sellType=fixed_price&skip=0&take=30&sort=price%23asc&ref=csgoskins',
'https://skinbid.com/auctions?mh=Dreams%20%26%20Nightmares%20Case&sellType=fixed_price&skip=0&take=30&sort=price%23asc&ref=csgoskins',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=vanguard&sellType=all',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=chroma%203&sellType=all',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=spectrum%202&sellType=all',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=clutch&sellType=all',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=snakebite&sellType=all',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=falchion&sellType=all',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=fracture&sellType=all',
'https://skinbid.com/listings?popular=false&goodDeals=false&sort=price%23asc&take=10&skip=0&search=prisma%202&sellType=all',
]
def page_has_loaded(driver):
return driver.execute_script("return document.readyState") == "complete"
def SkinBitPrices():
global count3
count3 = 0
with webdriver.Chrome(executable_path=driver_path) as driver:
for url in URLSkinBit:
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(page_has_loaded)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
container = soup.find('div', {'class': 'price'}).text
price = float(container.replace(' € ', ''))
print("%.2f" % price)
#Edit Excel-File
cell = str(3 + count3)
sheet2['B' + cell] = price
count3 += 1
driver.quit()
workbook.save(dir)
workbook.close()
return
SkinBitPrices()
Do you see possibilities to improve the performance here?
Thanks alot.