this is my first project with pandas and selenium so I may be making a dumb mistake. I've written this function to go through a list of nba players and scrape their game logs into data frames. It all works well but occasionally when I'm going through the list of players it'll just stop working at some random point and give me this error
Traceback (most recent call last):
File "/Users/arslanamir/PycharmProjects/nba/main.py", line 154, in <module>
Game_Log_Scraper(players, x)
File "/Users/arslanamir/PycharmProjects/nba/main.py", line 48, in Game_Log_Scraper
tables = pd.read_html(html, flavor='lxml')
File "/Users/arslanamir/PycharmProjects/nba/venv/lib/python3.9/site-packages/pandas/util/_decorators.py", line 299, in wrapper
return func(*args, **kwargs)
File "/Users/arslanamir/PycharmProjects/nba/venv/lib/python3.9/site-packages/pandas/io/html.py", line 1085, in read_html
return _parse(
File "/Users/arslanamir/PycharmProjects/nba/venv/lib/python3.9/site-packages/pandas/io/html.py", line 913, in _parse
raise retained
File "/Users/arslanamir/PycharmProjects/nba/venv/lib/python3.9/site-packages/pandas/io/html.py", line 893, in _parse
tables = p.parse_tables()
File "/Users/arslanamir/PycharmProjects/nba/venv/lib/python3.9/site-packages/pandas/io/html.py", line 213, in parse_tables
tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
File "/Users/arslanamir/PycharmProjects/nba/venv/lib/python3.9/site-packages/pandas/io/html.py", line 684, in _parse_tables
raise ValueError(f"No tables found matching regex {repr(pattern)}")
ValueError: No tables found matching regex '.+'
Process finished with exit code 1
This is the function
def Game_Log_Scraper(players):
for name in players:
first = name.split()[0]
last = name.split()[1]
if not Path(f'/Users/arslanamir/PycharmProjects/nba/{first} {last}').is_file():
driver = webdriver.Chrome(executable_path='/Users/arslanamir/PycharmProjects/chromedriver')
driver.get(f'https://www.nba.com/stats/players/boxscores/?CF=PLAYER_NAME*E*{first}%20{last}&Season=2020-21'
f'&SeasonType=Regular%20Season')
html = driver.page_source
tables = pd.read_html(html, flavor='lxml')
data = tables[1]
driver.close()
not_needed = ['Match\xa0Up', 'Season', 'FGM', 'FGA', '3PM', '3PA', '3P%', 'FTM', 'FTA',
'FT%', 'STL', 'BLK', 'TOV', '+/-', 'FP', 'FG%', 'OREB', 'DREB', 'PF']
for item in not_needed:
data.drop(item, axis=1, inplace=True)
data.dropna(axis=0, inplace=True)
data.drop('W/L', axis=1, inplace=True)
with open(f'{first} {last}', 'w+') as f:
f.write(data.to_string())
return players
I've tried changing the read_html flavor to html5lib and bs4 also and neither work. Here is an example of the webpage, https://www.nba.com/stats/players/boxscores/?CF=PLAYER_NAMEEMalik%20Beasley&Season=2020-21&SeasonType=Regular%20Season