1

I've been practicing webscraping with the nba.com playerlist, but I've ran into a problem where a link that I scraped in one for loop does not appear when I call on it in another for loop.

I have already tried to make more variables in both the original for loop and the for loop I want the variable to show up in, but it does not appear. I am trying to use the link that I scraped (it is not the full link but rather the end part of the link which I'm trying to combine with the base of the link).

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
class Player():
    """docstring for ClassName"""
    def __init__(self):
        self.name = ""
        self.link = ""
        self.PPG = ""
        self.RPG = ""
def get_player_list():
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    browser.get('https://stats.nba.com/players/list/')
    soup = BeautifulSoup(browser.page_source, 'lxml')
    browser.quit()
    names = soup.find('div',class_='stats-player-list players-list')
    player_list = []



for name in names.find_all('a'):
        new_play = Player()
        new_play.name = name.text
        new_play.link = name["href"]
        player_list.append(new_play)

    for one_player in player_list:
        print (one_player.name)
        print (one_player.link)



return player_list
def get_player_stats(player_list):
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    for p in player_list[0:2]:

        browser.get('https://stats.nba.com'+p.link)

        soup = BeautifulSoup(browser.page_source, 'lxml')

        browser.quit()
        PPG = ""
        points1 = soup.find('a',href = '/players/traditional/?sort=PTS&dir=-1')
        points = points1.div
        for point in points.findNextSiblings():
            PPG = "PPG" + point.text
        RPG = ""
        rebounds1 = soup.find('a',href = '/players/traditional/?sort=REB&dir=-1')
        rebounds = rebounds1.div
        for rebound in rebounds.findNextSiblings():
            RPG = "RPG" + rebound.text

        p.PPG = PPG
        p.RPG = RPG

    browser.quit()
    return player_list
player_list = get_player_stats(get_player_list())

As shown in the offset code starting with names.find_all('a'):, everything works properly and the link gets transferred and prints out following the template(ex. Abrines, Alex /player/203518/), but when it gets to for p in player_list[0:2]:, when I put in p.link, it doesn't get transferred over, and when I tried to make it print p.link, nothing got printed. Any help would be appreciated as I've been testing out so many things for a while now!

bpat123
  • 11
  • 1

2 Answers2

0

player_list variable is not a global variable. You define this in a single function, To keep track it in all functions, yous should initialize it globally.

0

This is because the first href in null.In that case you need to put condition before you adding this in the list.I have added that step now check.

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
class Player():

    """docstring for ClassName"""
    def __init__(self):
        self.name = ""
        self.link = ""
        self.PPG = ""
        self.RPG = ""

def get_player_list():
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    browser.get('https://stats.nba.com/players/list/')
    soup = BeautifulSoup(browser.page_source, 'lxml')
    browser.quit()
    names = soup.find('div',class_='stats-player-list players-list')
    player_list = []



    for name in names.find_all('a'):
     if name["href"]:
        new_play = Player()
        new_play.name = name.text
        new_play.link = name["href"]
        player_list.append(new_play)


    # for one_player in player_list:
    #    print (one_player.name)
    #    print (one_player.link)



    return player_list
def get_player_stats(player_list):
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    for p in player_list[0:2]:
        print('https://stats.nba.com'+p.link)
        browser.get('https://stats.nba.com'+p.link)

        soup = BeautifulSoup(browser.page_source, 'lxml')

        browser.quit()
        PPG = ""
        points1 = soup.find('a',href = '/players/traditional/?sort=PTS&dir=-1')
        points = points1.div
        for point in points.findNextSiblings():
            PPG = "PPG" + point.text
        RPG = ""
        rebounds1 = soup.find('a',href = '/players/traditional/?sort=REB&dir=-1')
        rebounds = rebounds1.div
        for rebound in rebounds.findNextSiblings():
            RPG = "RPG" + rebound.text

        p.PPG = PPG
        p.RPG = RPG

    browser.quit()
    return player_list

player_list = get_player_stats(get_player_list())
KunduK
  • 32,888
  • 5
  • 17
  • 41