I am currently trying to create a stock information script. However, I am only able to retrieve the data from the initial page of the stocks, and not the key statistics page. This is the page I am trying to obtain data from: https://au.finance.yahoo.com/quote/TICKER/key-statistics?p=TICKER
Here is the code that I am using: (mostly from a video I watched)
# -*- coding: utf-8 -*-
import os, sys
import csv
from bs4 import BeautifulSoup
import urllib3
import xlsxwriter
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
key_stats_on_main =['Market cap', 'PE ratio (TTM)', 'EPS (TTM)']
key_stats_on_stat =['Enterprise value', 'Trailing P/E', 'Forward P/E',
'PEG Ratio (5-yr expected)', 'Return on assets', 'Quarterly revenue growth (yoy)',
'EBITDA', 'Diluted EPS (ttm)', 'Total debt/equity (mrq)', 'Current ratio (mrq)']
stocks_arr =[]
pfolio_file= open("/Users/z_hutcho/Documents/Programming/yfinance/stocks.csv", "r")
for line in pfolio_file:
indv_stock_arr = line.strip().split(',')
stocks_arr.append(indv_stock_arr)
print(stocks_arr)
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=2560x1600")
chrome_driver = "/usr/local/bin/chromedriver"
browser = webdriver.Chrome(options=chrome_options, executable_path=chrome_driver)
stock_info_arr = []
for stock in stocks_arr:
stock_info = []
ticker = stock[0]
stock_info.append(ticker)
url = "https://au.finance.yahoo.com/quote/{0}?p={0}".format(ticker)
url2 = "https://au.finance.yahoo.com/quote/{0}/key-statistics?p={0}".format(ticker)
browser.get(url)
innerHTML = browser.execute_script("return document.body.innerHTML")
soup = BeautifulSoup(innerHTML, 'html.parser')
for stat in key_stats_on_main:
page_stat1 = soup.find(text=stat)
try:
page_row1 = page_stat1.find_parent('tr')
try:
page_statnum1 = page_row1.find_all('span')[1].contents[1].get_text(strip=True)
print(page_statnum1)
except:
page_statnum1 = page_row1.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum1)
except:
print('Invalid parent for this element')
page_statnum1 = "N/A"
stock_info.append(page_statnum1)
browser.get(url2)
innerHTML2 = browser.execute_script("return document.body.innerHTML")
soup2 = BeautifulSoup(innerHTML2, 'html.parser')
for stat in key_stats_on_stat:
page_stat2 = soup2.find(text=stat)
try:
page_row2 = page_stat2.find_parent('tr')
try:
page_statnum2 = page_row2.find_all('span')[1].contents[1].get_text(strip=True)
print(page_statnum2)
except:
page_statnum2 = page_row2.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum2)
except:
print('Invalid parent for this element')
page_statnum2 = 'N/A'
stock_info.append(page_statnum2)
stock_info_arr.append(stock_info)
print(stock_info_arr)
The results from this are as follows for the stock ticker AAPL:
1.3T
24.52
11.89
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Not too sure why the second page isn't being scraped correctly... I am not very experienced in beautiful soup. Any help would be much appreciated.