1

I was trying to work on a person project (stock market predictions) for school, when Google started acting up again...

I realize that Google Finance has been complete garbage this past year, but it still seemed to be working somewhat up until this morning. I got an error the first time I ran the code even though it worked fine yesterday.

So I tried just running a sample code from the actual library page: https://pypi.org/project/googlefinance.client/

!pip install googlefinance.client

from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data

# Dow Jones
param = {
    'q': ".DJI", # Stock symbol (ex: "AAPL")
    'i': "86400", # Interval size in seconds ("86400" = 1 day intervals)
    'x': "INDEXDJX", # Stock exchange symbol on which stock is traded (ex: "NASD")
    'p': "1Y" # Period (Ex: "1Y" = 1 year)
}
# get price data (return pandas dataframe)
df = get_price_data(param)
print(df)

params = [
    # Dow Jones
    {
        'q': ".DJI",
        'x': "INDEXDJX",
    },
    # NYSE COMPOSITE (DJ)
    {
        'q': "NYA",
        'x': "INDEXNYSEGIS",
    },
    # S&P 500
    {
        'q': ".INX",
        'x': "INDEXSP",
    }
]
period = "1Y"
# get open, high, low, close, volume data (return pandas dataframe)
df = get_prices_data(params, period)
print(df)

and still got

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-df3429694fd0> in <module>()
      9 }
     10 # get price data (return pandas dataframe)
---> 11 df = get_price_data(param)
     12 print(df)
     13 

/usr/local/lib/python3.6/dist-packages/googlefinance/client.py in get_price_data(query)
     13                 cols = price.split(",")
     14                 if cols[0][0] == 'a':
---> 15                         basetime = int(cols[0][1:])
     16                         index.append(datetime.fromtimestamp(basetime))
     17                         data.append([float(cols[4]), float(cols[2]), float(cols[3]), float(cols[1]), int(cols[5])])

ValueError: invalid literal for int() with base 10: 'nd&nbsp;...</span><br></div></div><div class="g"><h3 class="r"><a href="/url?q=https://en.wikipedia.org/wiki/DJI_(company)&amp;sa=U&amp;ved=0ahUKEwiB-e_gjMzcAhUpwlkKHTTUC74QFghGMAw&amp;usg=AOvVaw1ugw

has anyone run into this before and know what's wrong or how to fix it?

Or, on a separate note, does anyone know of a good alternative to Google Finance?

Raksha
  • 1,572
  • 2
  • 28
  • 53

3 Answers3

1

It was a problem with the example code. If you go to the GitHub Homepage, you'll get the latest version—even the small updates.

I slightly modified client.py and had no problems with the output.

#!/usr/bin/env python
# coding: utf-8
import requests
from datetime import datetime
import pandas as pd


def get_price_data(query):
    r = requests.get(
        "https://finance.google.com/finance/getprices", params=query)
    lines = r.text.splitlines()
    data = []
    index = []
    basetime = 0
    for price in lines:
        cols = price.split(",")
        if cols[0][0] == 'a':
            basetime = int(cols[0][1:])
            index.append(datetime.fromtimestamp(basetime))
            data.append([float(cols[4]), float(cols[2]), float(
                cols[3]), float(cols[1]), int(cols[5])])
        elif cols[0][0].isdigit():
            date = basetime + (int(cols[0]) * int(query['i']))
            index.append(datetime.fromtimestamp(date))
            data.append([float(cols[4]), float(cols[2]), float(
                cols[3]), float(cols[1]), int(cols[5])])
    return pd.DataFrame(data, index=index, columns=['Open', 'High', 'Low', 'Close', 'Volume'])


def get_closing_data(queries, period):
    closing_data = []
    for query in queries:
        query['i'] = 86400
        query['p'] = period
        r = requests.get(
            "https://finance.google.com/finance/getprices", params=query)
        lines = r.text.splitlines()
        data = []
        index = []
        basetime = 0
        for price in lines:
            cols = price.split(",")
            if cols[0][0] == 'a':
                basetime = int(cols[0][1:])
                date = basetime
                data.append(float(cols[1]))
                index.append(datetime.fromtimestamp(date).date())
            elif cols[0][0].isdigit():
                date = basetime + (int(cols[0]) * int(query['i']))
                data.append(float(cols[1]))
                index.append(datetime.fromtimestamp(date).date())
        s = pd.Series(data, index=index, name=query['q'])
        closing_data.append(s[~s.index.duplicated(keep='last')])
    return pd.concat(closing_data, axis=1)


def get_open_close_data(queries, period):
    open_close_data = pd.DataFrame()
    for query in queries:
        query['i'] = 86400
        query['p'] = period
        r = requests.get(
            "https://finance.google.com/finance/getprices", params=query)
        lines = r.text.splitlines()
        data = []
        index = []
        basetime = 0
        for price in lines:
            cols = price.split(",")
            if cols[0][0] == 'a':
                basetime = int(cols[0][1:])
                date = basetime
                data.append([float(cols[4]), float(cols[1])])
                index.append(datetime.fromtimestamp(date).date())
            elif cols[0][0].isdigit():
                date = basetime + (int(cols[0]) * int(query['i']))
                data.append([float(cols[4]), float(cols[1])])
                index.append(datetime.fromtimestamp(date).date())
        df = pd.DataFrame(data, index=index, columns=[
                          query['q'] + '_Open', query['q'] + '_Close'])
        open_close_data = pd.concat(
            [open_close_data, df[~df.index.duplicated(keep='last')]], axis=1)
    return open_close_data


def get_prices_data(queries, period):
    prices_data = pd.DataFrame()
    for query in queries:
        query['i'] = 86400
        query['p'] = period
        r = requests.get(
            "https://finance.google.com/finance/getprices", params=query)
        lines = r.text.splitlines()
        data = []
        index = []
        basetime = 0
        for price in lines:
            cols = price.split(",")
            if cols[0][0] == 'a':
                basetime = int(cols[0][1:])
                date = basetime
                data.append([float(cols[4]), float(cols[2]), float(
                    cols[3]), float(cols[1]), int(cols[5])])
                index.append(datetime.fromtimestamp(date).date())
            elif cols[0][0].isdigit():
                date = basetime + (int(cols[0]) * int(query['i']))
                data.append([float(cols[4]), float(cols[2]), float(
                    cols[3]), float(cols[1]), int(cols[5])])
                index.append(datetime.fromtimestamp(date).date())
        df = pd.DataFrame(data, index=index, columns=[
                          query['q'] + '_Open', query['q'] + '_High', query['q'] + '_Low', query['q'] + '_Close', query['q'] + '_Volume'])
        prices_data = pd.concat(
            [prices_data, df[~df.index.duplicated(keep='last')]], axis=1)
    return prices_data


def get_prices_time_data(queries, period, interval):
    prices_time_data = pd.DataFrame()
    for query in queries:
        query['i'] = interval
        query['p'] = period
        r = requests.get(
            "https://finance.google.com/finance/getprices", params=query)
        lines = r.text.splitlines()
        data = []
        index = []
        basetime = 0
        for price in lines:
            cols = price.split(",")
            if cols[0][0] == 'a':
                basetime = int(cols[0][1:])
                date = basetime
                data.append([float(cols[4]), float(cols[2]), float(
                    cols[3]), float(cols[1]), int(cols[5])])
                index.append(datetime.fromtimestamp(date))
            elif cols[0][0].isdigit():
                date = basetime + (int(cols[0]) * int(query['i']))
                data.append([float(cols[4]), float(cols[2]), float(
                    cols[3]), float(cols[1]), int(cols[5])])
                index.append(datetime.fromtimestamp(date))
        df = pd.DataFrame(data, index=index, columns=[
                          query['q'] + '_Open', query['q'] + '_High', query['q'] + '_Low', query['q'] + '_Close', query['q'] + '_Volume'])
        prices_time_data = pd.concat(
            [prices_time_data, df[~df.index.duplicated(keep='last')]], axis=1)
    return prices_time_data

Snippet

params = {
    'q': ".DJI",  # Stock symbol (ex: "AAPL")
    'i': "86400",  # Interval size in seconds ("86400" = 1 day intervals)
    # Stock exchange symbol on which stock is traded (ex: "NASD")
    'x': "INDEXDJX",
    'p': "1Y"  # Period (Ex: "1Y" = 1 year)
}
df = get_price_data(params)
print(df)

Output

Volume Open High ... Close
328405532 2017-08-01 15:00:00 21961.42 21990.96 ... 21963.92
328405532 2017-08-02 15:00:00 22004.36 22036.10 ... 22016.24
336824836 2017-08-03 15:00:00 22007.58 22044.85 ... 22026.10
278731064 2017-08-04 15:00:00 22058.39 22092.81 ... 22092.81
253635270 2017-08-07 15:00:00 22100.20 22121.15 ... 22118.42
213012378 2017-08-08 15:00:00 22095.14 22179.11 ... 22085.34

Noah M.
  • 310
  • 1
  • 8
  • Sorry for not following up. Did you figure it out? If not, I'm sure there's another solution. – Noah M. Aug 09 '18 at 06:08
  • No, I switched to Yahoo finance :\ ... They both work sporadically, but Yahoo seems a bit more consistent. – Raksha Aug 09 '18 at 15:02
0

For the last 48 hours or so ".INX" has not been updating on my google sheets. .DJI and .IXIC are still updating, although I think one of them wasn't for a while recently.

mrk
  • 8,059
  • 3
  • 56
  • 78
0

When scraping Google Finance, it is not necessary really to use any API unless you really want to. It will be enough to use the BeautifulSoup web scraping library, with its help you can scrape all the information you need, and pretty much everything from the ticker page.

Check code in online IDE.



from bs4 import BeautifulSoup
import requests, lxml, json
from itertools import zip_longest


def scrape_google_finance(ticker: str):
    # https://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls
    params = {
        "hl": "en" # language
        }

    # https://docs.python-requests.org/en/master/user/quickstart/#custom-headers
    # https://www.whatismybrowser.com/detect/what-is-my-user-agent
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
        }

    html = requests.get(f"https://www.google.com/finance/quote/{ticker}", params=params, headers=headers, timeout=30)
    soup = BeautifulSoup(html.text, "lxml")
    
    ticker_data = {"right_panel_data": {},
                    "ticker_info": {}}
    
    ticker_data["ticker_info"]["title"] = soup.select_one(".zzDege").text
    ticker_data["ticker_info"]["current_price"] = soup.select_one(".AHmHk .fxKbKc").text
    
    right_panel_keys = soup.select(".gyFHrc .mfs7Fc")
    right_panel_values = soup.select(".gyFHrc .P6K39c")
    
    for key, value in zip_longest(right_panel_keys, right_panel_values):
        key_value = key.text.lower().replace(" ", "_")

        ticker_data["right_panel_data"][key_value] = value.text
    
    return ticker_data
    

data = scrape_google_finance(ticker="GOOGL:NASDAQ")

print(json.dumps(data, indent=2))

Example output

{
  "right_panel_data": {
    "previous_close": "$118.84",
    "day_range": "$119.46 - $120.56",
    "year_range": "$101.88 - $151.55",
    "market_cap": "1.57T USD",
    "avg_volume": "34.44M",
    "p/e_ratio": "22.76",
    "dividend_yield": "-",
    "primary_exchange": "NASDAQ",
    "ceo": "Sundar Pichai",
    "founded": "Oct 2, 2015",
    "headquarters": "Mountain View, CaliforniaUnited States",
    "website": "abc.xyz",
    "employees": "174,014"
  },
  "ticker_info": {
    "title": "Alphabet Inc Class A",
    "current_price": "$120.11"
  }
}

There's a scrape Google Finance Ticker Quote Data in Python blog post if you need to scrape more data from Google Finance.

Denis Skopa
  • 1
  • 1
  • 1
  • 7