I've been practicing webscraping with the nba.com playerlist, but I've ran into a problem where a link that I scraped in one for loop does not appear when I call on it in another for loop.
I have already tried to make more variables in both the original for loop and the for loop I want the variable to show up in, but it does not appear. I am trying to use the link that I scraped (it is not the full link but rather the end part of the link which I'm trying to combine with the base of the link).
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
class Player():
"""docstring for ClassName"""
def __init__(self):
self.name = ""
self.link = ""
self.PPG = ""
self.RPG = ""
def get_player_list():
opt = webdriver.ChromeOptions()
opt.add_argument('headless')
browser = webdriver.Chrome(options=opt)
browser.get('https://stats.nba.com/players/list/')
soup = BeautifulSoup(browser.page_source, 'lxml')
browser.quit()
names = soup.find('div',class_='stats-player-list players-list')
player_list = []
for name in names.find_all('a'):
new_play = Player()
new_play.name = name.text
new_play.link = name["href"]
player_list.append(new_play)
for one_player in player_list:
print (one_player.name)
print (one_player.link)
return player_list
def get_player_stats(player_list):
opt = webdriver.ChromeOptions()
opt.add_argument('headless')
browser = webdriver.Chrome(options=opt)
for p in player_list[0:2]:
browser.get('https://stats.nba.com'+p.link)
soup = BeautifulSoup(browser.page_source, 'lxml')
browser.quit()
PPG = ""
points1 = soup.find('a',href = '/players/traditional/?sort=PTS&dir=-1')
points = points1.div
for point in points.findNextSiblings():
PPG = "PPG" + point.text
RPG = ""
rebounds1 = soup.find('a',href = '/players/traditional/?sort=REB&dir=-1')
rebounds = rebounds1.div
for rebound in rebounds.findNextSiblings():
RPG = "RPG" + rebound.text
p.PPG = PPG
p.RPG = RPG
browser.quit()
return player_list
player_list = get_player_stats(get_player_list())
As shown in the offset code starting with names.find_all('a'):
, everything works properly and the link gets transferred and prints out following the template(ex. Abrines, Alex /player/203518/), but when it gets to for p in player_list[0:2]:
, when I put in p.link, it doesn't get transferred over, and when I tried to make it print p.link, nothing got printed. Any help would be appreciated as I've been testing out so many things for a while now!