I'm currently working on a code that scrapes a website for football match data, and I want it to put the data tables from the last 5 seasons into one excel workbook but on different sheets for each season.
The code below works fine until the point where it actually has to create the excel workbooks. Currently, it creates 5 different workbooks, one for each id, but only the ids 1631 and 1889 contain the correct data. The other three workbooks contain the data from id=1889.
I already looked up several solutions, but couldn't find one that matches my problem, so I'm not entirely sure if it even can be done. Thank you in advance!
import requests
from bs4 import BeautifulSoup
import pandas as pd
import xlsxwriter
def get_df(team_name, team_id):
seasons_url = (
f"https://fbref.com/de/mannschaften/{team_id}/2021-2022/{team_name}-Statistiken",
f"https://fbref.com/de/mannschaften/{team_id}/2020-2021/{team_name}-Statistiken",
f"https://fbref.com/de/mannschaften/{team_id}/2019-2020/{team_name}-Statistiken",
f"https://fbref.com/de/mannschaften/{team_id}/2018-2019/{team_name}-Statistiken",
f"https://fbref.com/de/mannschaften/{team_id}/2017-2018/{team_name}-Statistiken",)
ids = ["stats_standard_11160",
"stats_standard_10728",
"stats_standard_3232",
"stats_standard_1889",
"stats_standard_1631"]
heads = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
for season in seasons_url:
url = season
response = requests.get(url, headers=heads)
html = response.text
soup = BeautifulSoup(html, "html.parser")
for id in ids:
tables = (soup.find(id=id))
if tables is not None:
table = tables
for table_per_season in table:
df = pd.read_html(str(table))[0]
writer = pd.ExcelWriter(f"{team_name}{id}.xlsx", engine="xlsxwriter")
df.to_excel(writer, sheet_name=f"{id}", index=True)
writer.save()