so i 'm trying with this code to retrieve some informations (Inchikey and SMILES) from lipids database but my code crash after the first itteration with this error: ---------------------------------------------------------------------------
StaleElementReferenceException Traceback (most recent call last)
in <cell line: 35>() 35 for row in rows: 36 # Get the elements of each row ---> 37 cells = row.find_elements('tag name', 'td') 38 39 # Check if the row contains enough cells
3 frames
/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response) 243 alert_text = value["alert"].get("text") 244 raise exception_class(message, screen, stacktrace, alert_text) # type: ignore[call-arg] # mypy is not smart enough here --> 245 raise exception_class(message, screen, stacktrace)
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document (Session info: headless chrome=90.0.4430.212); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Here is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
def web_driver():
options = Options()
options.add_argument("--verbose")
options.add_argument('--no-sandbox')
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--disable-dev-shm-usage')
return webdriver.Chrome(options=options)
# Create an instance of the WebDriver using the configured browser options
driver = web_driver()
# Open the website URL in the browser
url = 'https://www.swisslipids.org/#/browse_tree?entity_id=SLM:000389800'
driver.get(url)
# Interact with the elements on the page to extract the desired data
# Find the table element
table_element = driver.find_element('css selector', '.table')
# Iterate through the table rows, excluding the first header row
rows = table_element.find_elements('tag name', 'tr')[1:]
count = 0 # Counter initialized to 0
for row in rows:
# Get the elements of each row
cells = row.find_elements('tag name', 'td')
# Check if the row contains enough cells
if len(cells) >= 2:
# Extract the data from each cell
identifiant_lipid = cells[0].text.strip()
lipid_name = cells[1].text.strip()
# Generate the specific link for each lipid ID by combining it with the base URL
lipid_link = f'https://www.swisslipids.org/#/entity/{identifiant_lipid}/'
count += 1 # Increment the counter
print("ID lipid:", identifiant_lipid)
print("Nom lipide:", lipid_name)
print("Lien:", lipid_link)
# Open the specific link for each lipid ID
driver.get(lipid_link)
time.sleep(1) # Pause for page load
try:
# Explicitly wait for the chemInfo element to be clickable
chem_info_element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#chemInfo > fieldset:nth-child(1) > div:nth-child(3)'))
)
# Find all the dt and dd elements within the chemInfo element
dt_elements = chem_info_element.find_elements('tag name', 'dt')
dd_elements = chem_info_element.find_elements('tag name', 'dd')
# Extract the desired information based on the index of the dt element
for index, dt_element in enumerate(dt_elements):
dt_text = dt_element.text.strip()
dd_element = dd_elements[index]
if dt_text == 'InChI key':
inchi_key = dd_element.text.strip().replace('InChIKey=', '')
print("InChI Key:", inchi_key)
if dt_text == 'SMILES':
smiles = dd_element.text.strip()
print("SMILES:", smiles)
except (NoSuchElementException, StaleElementReferenceException) as e:
print("Informations chimiques non trouvées")
print("Exception:", e)
print() # Empty line for better readability
# Print the total number of lipid IDs
print("Nombre total d'ID lipid:", count)
# Close the browser
driver.quit()
Ps: Actually, it does print the right answer but just for the first itteration and then crash;
Here is the print for the first itteration:
ID lipid: SLM:000000510 Nom lipide: hexadecanoate Lien: https://www.swisslipids.org/#/entity/SLM:000000510/ InChI Key: IPCSVZSSVZVIGE-UHFFFAOYSA-M SMILES: CCCCCCCCCCCCCCCC([O-])=O
Thanks in advance
i try every kind of code i saw here, and what i want is to be able to retrieve InChI Key and SMILES informations for each lipid_link.