Im trying to scrape a website for multiple values regarding a list of books. The links to the book pages are stored in a dataframe. Now I need a function that iterates those links and adds the book values to new columns in the dataframe. I don't want to request the page again every time I'm scraping a new book value, so I want to do it all in one function. The problem is the function then returns multiple values (e.g. book_title and book_rating) which I don't know how to best add to the dataframe.
I tried the following, which I know can't work but I'm stuck:
import requests as rq
from bs4 import BeautifulSoup as bs
import pandas as pd
#function to get the book page
def get_book_page(page):
# Construct the URL
books_page_url = page
# Get the HTML page content using requests
response = rq.get(books_page_url, headers = headers)
# Ensure that the response is valid
if response.status_code != 200:
print('Status code:', response.status_code)
raise Exception('Failed to fetch web page ' + books_page_url)
# Construct a beautiful soup document
doc = bs(response.content, "html.parser")
return doc
#function to scrape the book title
def scrape_book_title(book_content):
try:
title_tag = book_content.find("h1", class_="bc-heading bc-color-base bc-size-large bc-text-bold").text.strip()
except:
title_tag = "fehlt"
return title_tag
#function to scrape the book rating
def scrape_book_rating(book_content):
star_tag = book_content.find("li", class_="bc-list-item ratingsLabel")
try:
rating_tag = star_tag.find("span", class_="bc-text bc-pub-offscreen").text.strip()
except:
rating_tag = "fehlt"
return rating_tag
#function I'm trying to fix
def get_book_title(links):
bs_page = get_book_page(links)
bs_content = bs_page.find("ul", class_="bc-list bc-spacing-s2 bc-color-secondary bc-list-nostyle")
book_title = scrape_book_title(bs_content)
book_rating = scrape_book_rating(bs_content)
return book_title, book_rating
#here I would like to add the columns "A_Titel" and "A_Rating" with the values of "book_title" and "book_rating"
df['A_Titel'], df['A_Rating'] = df.apply(lambda x: get_book_title(x.Link), axis=1)