To collect all the data, you will need to connect the BeautifulSoup library. To begin with, to reduce the number of iterations, display 100 elements on the page. To do this, click on the drop-down list, and then on option 100.
Then in the loop we sort through all the found elements and output them, or save them, it's up to you. At each iteration, we find the NEXT button and click on it to load new elements. I left a comment in the code to understand the process.
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
url = 'http://live.xacte.com/lamarathon/'
chrome_options = Options()
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('--no-sandbox')
driver = webdriver.Chrome(options=chrome_options, executable_path=".../chromedriver_linux64/chromedriver") # the path to your chromedriver
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.NAME, "xact_results_search_results_length")))
dropdown = driver.find_element(By.NAME, "xact_results_search_results_length").click() # click on the drop-down list
options = driver.find_elements(By.TAG_NAME, "option")
for option in options:
if option.text == '100':
option.click() # click on the option 100
break
time.sleep(2) # waiting for page loading
for i in range(272): # number of iterations by number of data pages
time.sleep(1) # waiting for page loading
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
tbody = soup.find('tbody', role='alert') # getting the data
trs = tbody.find_all('tr')
for tr in trs:
tds = tr.find_all('td') # output data by referring to the list items by position
print(f'Bib - {tds[0].text}')
print(f'Name - {tds[1].text}')
print(f'Sex - {tds[2].text}')
print(f'Age - {tds[3].text}')
print(f'City - {tds[4].text}')
print(f'Country - {tds[5].text}')
print(f'Net - {tds[6].text}')
print(f'Clock - {tds[7].text}')
print(f'Pace - {tds[8].text}')
print(f'Event - {tds[9].text}')
print(f'Next------------------------------------') # for clarity
print(f'Iter - {i}, Number of elements per page - {len(trs)}') # we look at the iteration number and the amount of data collected
wait.until(EC.element_to_be_clickable((By.ID, "xact_results_search_results_next")))
driver.find_element(By.ID, "xact_results_search_results_next").click() # click the Next button
Output:
......
Bib - 5806
Name - Zulma Castaneda
Sex - F
Age - 32
City - Foothill Ranch, CA
Country - USA
Net - 5:36:38
Clock - 5:50:27
Pace - 12:50/mi
Event - Marathon
Next------------------------------------
Iter - 270, Number of elements per page - 100
Bib - 4226
Name - Zvi Donat
Sex - M
Age - 44
City - Natanya
Country - ISR
Net -
Clock -
Pace -
Event - Marathon
Next------------------------------------
Bib - 55448
Name - Zvi Donat
Sex - M
Age - 44
City - Natanya
Country -
Net -
Clock -
Pace -
Event - 5K
Next------------------------------------
Iter - 271, Number of elements per page - 2