0

Background: I'm attempting to create a dataframe using data called from Twitch's API. They only allow 100 records per call so with each pull a new Pagination Cursor is offered in order to move on to the next page. I'm using the following code to try and efficiently pull this data rather than manually adjusting the after=(pagination value) in the get response. Right now the variable I'm trying to make dynamic is the 'Pagination' variable but it only gets updated once the loop finishes - not helpful! Take a look below and see if you notice anything I can change to achieve this goal. Any help is appreciated!

TwitchTopGamesDataFrame = [] #This is our Data List                    
BaseURL = 'https://api.twitch.tv/helix/games/top?first=100'
Headers = {'client-id':'lqctse0orgdbs5gdf5faz665api03r','Authorization': 'Bearer a1yl09mwmnwetp6ovocilheias8pzt'}
Indent = 2
Pagination = ''
FullURL = BaseURL + Pagination
Response = requests.get(FullURL,headers=Headers)
iterations = 1 # Data records returned are equivalent to iterations x100

#Loop: Response, Convert JSON data, Append to Data List, Get Pagination & Replace String in Variable - Iterate until 300 records
while count <= 3:
    #Grab JSON Data, Convert, & Append
    ResponseJSONData = Response.json()
    #print(pgn) - Debug
    pd.set_option('display.max_rows', None)
    TopGamesDF = pd.DataFrame(ResponseJSONData['data'])
    TopGamesDF = TopGamesDF[['id','name']]
    TopGamesDF = TopGamesDF.rename(columns={'id':'GameID','name':'GameName'})
    TopGamesDF['Rank'] = TopGamesDF.index + 1
    TwitchTopGamesDataFrame.append(TopGamesDF)
    #print(FullURL) - Debug
    #Grab & Replace Pagination Value
    ResponseJSONData['pagination']
    RPagination = pd.DataFrame(ResponseJSONData['pagination'],index=[0])
    pgn = str('&after='+RPagination.to_string(index=False,header=False).strip())
    Pagination = pgn
    #print(FullURL) - Debug
    iterations += 1
TwitchTopGamesDataFrame```

1 Answers1

0

Figured it out:

def top_games(page_count):
    from time import gmtime, strftime
    strftime("%Y-%m-%d %H:%M:%S", gmtime())
    print("Time of Execution:", strftime("%Y-%m-%d %H:%M:%S", gmtime()))

    #In order to condense the code above and be more efficient, a while/for loop would work great.
    #Goal: Run a While Loop to create a larger DataFrame through Pagination as the Twitch API only allows for 100 records per call.

    baseURL = 'https://api.twitch.tv/helix/games/top?first=100' #Base URL
    Headers = {'client-id':'lqctse0orgdbs5gdf5faz665api03r','Authorization': 'Bearer a1yl09mwmnwetp6ovocilheias8pzt'}
    Indent = 2
    Pagination = ''
    FullURL = BaseURL + Pagination
    Response = requests.get(FullURL,headers=Headers)
    start_count = 0
    count = 0 # Data records returned are equivalent to iterations x100
    max_count = page_count

    #Loop: Response, Convert JSON data, Append to Data List, Get Pagination & Replace String in Variable - Iterate until 300 records
    while count <= max_count:
        #Grab JSON Data, Extend List
        Pagination
        FullURL = baseURL + Pagination
        Response = requests.get(FullURL,headers=Headers)
        ResponseJSONData = Response.json()
        pd.set_option('display.max_rows', None)
        if count == start_count:
            TopGamesDFL = ResponseJSONData['data']
        if count > start_count:
            i = ResponseJSONData['data']
            TopGamesDFL.extend(i)
        #Grab & Replace Pagination Value
        ResponseJSONData['pagination']
        RPagination = pd.DataFrame(ResponseJSONData['pagination'],index=[0])
        pgn = str('&after='+RPagination.to_string(index=False,header=False).strip())
        Pagination = pgn
        count += 1        
        if count == max_count:
            FinalDataFrame = pd.DataFrame(TopGamesDFL)
            FinalDataFrame = FinalDataFrame[['id','name']]
            FinalDataFrame = FinalDataFrame.rename(columns={'id':'GameID','name':'GameName'})
            FinalDataFrame['Rank'] = FinalDataFrame.index + 1
    return FinalDataFrame