I'm collecting data from several repositories (around 10k) from my organization for a AI search engine. I have written a python script (using PyGithub lib) to fetch the data, including a rate limit access that checks if the run time is close to 60 minutes (the expiration time for a Github APP installation token), puts the script to sleep so it can generate a new token and proceed.
This is the script:
def main():
token = get_token()
g = Github(token)
org = 'XYZ'
count = 0
data = []
start_time = time.time()
rate = g.get_rate_limit()
print(f"Initiating at: \n\
Reset time: {rate.raw_data['core']['reset']}\n\
Remaining requests: {rate.raw_data['core']['remaining']}")
for repo in g.get_organization(org).get_repos():
# Check the rate limit and regenerate token if needed
g = check_rate_limit(rate,start_time)
# If the rate was regenerated, update the rate object
if g:
rate = g.get_rate_limit()
# Collect data for the repo
collect_data_for_repo(data,repo)
count += 1
print(rate.raw_data['core']['remaining'])
print(f'{repo.full_name}, {count}\n--------------------------------')
print(f"{g.get_rate_limit()}")
columns = ['Repository Name', 'Link', 'Readme Content', 'Comments', 'Languages', 'Topics']
df = pd.DataFrame(data, columns=columns)
rate_end_printing = g.get_rate_limit()
print(f"Rate limit remaining: {rate_end_printing.raw_data['core']['remaining']}")
#print(rate.raw_data['core']['remaining'])
#print(f'{repo.full_name}, {count}')
return df
if __name__ == "__main__":
main()
and this is the check_rate_limit func:
def check_rate_limit(rate,start_time):
if time.time() > rate.raw_data['core']['reset'] - 60:
sleep_time = 120
print(f'Putting to sleep for {sleep_time} seconds (runtine over 60 minutes)')
time.sleep(sleep_time)
token = get_token()
print(f'Token regenerated at {datetime.datetime.now()}')
return Github(token)
It collects data, reaches the time limit, generates a new token and when it continues I still get this error:
BadCredentialsException: 401 {"message": "Bad credentials", "documentation_url": "https://docs.github.com/rest"}
Any ideas on dealing with this time expiration limit when dealing with Github APP tokens?