I am trying to Loop through this list of news articles on the html data table and grab all the stories. The only way that seems possible is to click on each title and then it loads on the right side. I can get it to work but when I try to loop through them, it skips around and does some but not all. I have tried putting time.sleeps in and explicit sleeps but nothing has remedied it yet. Any help is appreciated!
I have tried putting time.sleeps in and explicit sleeps but nothing has remedied it yet. I have also looked into other ways to extract the story data on the right side but haven't found a good way to get besides clicking on each Headline link on the left side under the date column: Today April 17
import requests
from bs4 import BeautifulSoup
from io import BytesIO
from requests.auth import HTTPBasicAuth
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
STOCK = 'MARA'
#ETRADE START
PATH = "D:\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get(f"https://www.etrade.wallst.com/v1/stocks/news/search_results.asp?symbol= {STOCK} &rsO=new") #URL to be scraped
#print(driver.title)
try: #wait for elements to load before proceding
search = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "news_headlines")))
Table = search.find_element(By.CSS_SELECTOR, "table")
Rows = Table.find_elements(By.CSS_SELECTOR, 'tr')
rowcount = 0
for row in Rows:
rowcount = rowcount + 1
table_data = row.find_element(By.CSS_SELECTOR, 'td')
try:
Title1 = table_data.find_element(By.XPATH, f'//*[@id="news_headlines"]/table/tbody/tr[{rowcount}]/td/a')
print(Title1.text)
try: #try to click title
testclick = WebDriverWait(table_data, 10).until(EC.element_to_be_clickable(Title1))
time.sleep(2)
# Click_element = table_data.find_element(By.CSS_SELECTOR, 'a').click()
testclick.click()
except:
print("cant click title")
except:
TitleDate = table_data.find_element(By.XPATH, f'//*[@id="news_headlines"]/table/tbody/tr[{rowcount}]/td')
print(TitleDate.text)
if rowcount == 15:
break
finally:
driver.quit()