I am using slightly edited code from Daniel Rodriguez. I am attempting to get all NBA box score data from 2014. There are two prior parts to this code: the first grabs all of the team names and the second grabs all of the games for those teams with ESPN game id, date, home team, home score, away team and away score. These two portions worked just fine.
Then I attempt to run the portion that grabs all of the boxscore data for the games from game id. It works for a chunk of the games and then will stop on a game almost randomly and give the error:
AttributeError: 'NoneType' object has no attribute 'find_all'
I say randomly because I run the same code over and over and it never stops on the same box score. It errors out on a different box score every time.
Here is the code(the ** line is where the error happens):
import numpy as np
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup
import os
os.chdir('C:\Users\steven2r\Documents\Python')
games = pd.read_csv('games.csv').set_index('id')
BASE_URL = 'http://espn.go.com/nba/boxscore?gameId={0}'
request = requests.get(BASE_URL.format(games.index[0]))
table = BeautifulSoup(request.text).find('table', class_='mod-data')
heads = table.find_all('thead')
headers = heads[0].find_all('tr')[1].find_all('th')[1:]
headers = [th.text for th in headers]
columns = ['id', 'team', 'player'] + headers
bad_downloads = []
players = pd.DataFrame(columns=columns)
def get_players(players, team_name):
array = np.zeros((len(players), len(headers)+1), dtype=object)
array[:] = np.nan
for i, player in enumerate(players):
cols = player.find_all('td')
array[i, 0] = cols[0].text.split(',')[0]
for j in range(1, len(headers) + 1):
if not cols[1].text.startswith('DNP'):
array[i, j] = cols[j].text
frame = pd.DataFrame(columns=columns)
for x in array:
line = np.concatenate(([index, team_name], x)).reshape(1,len(columns))
new = pd.DataFrame(line, columns=frame.columns)
frame = frame.append(new)
return frame
for index, row in games.iterrows():
print(index)
request = requests.get(BASE_URL.format(index))
table = BeautifulSoup(request.text).find('table', class_='mod-data')
if table == []:
print index, 'bad'
bad_downloads.append(index)
else:
heads = table.find_all('thead')
bodies = table.find_all('tbody')
team_1 = heads[0].th.text
team_1_players = bodies[0].find_all('tr') + bodies[1].find_all('tr')
team_1_players = get_players(team_1_players, team_1)
players = players.append(team_1_players)
team_2 = heads[3].th.text
team_2_players = bodies[3].find_all('tr') + bodies[4].find_all('tr')
team_2_players = get_players(team_2_players, team_2)
players = players.append(team_2_players)
players = players.set_index('id')
print(players)
players.to_csv('players.csv')
print bad_downloads