I am looking to use a scraper to return Yelp reviews for motels within a town. What I need to be able to do is search the reviews for key words, such as "mold", and be provided the motel back, along with the review itself. I have some code (I am using Jupyterhub), but it seems to only be giving me back the motel names.
import json
from bs4 import BeautifulSoup
api_key = '#insert key here'
headers = {'Authorization': 'Bearer %s' % api_key}
url = 'https://api.yelp.com/v3/businesses/search'
params = {'term':'motel','location':'Williamsburg, VA'}
req = requests.get(url, params=params, headers=headers)
parsed = json.loads(req.text)
businesses = parsed["businesses"]
business_url_list = [business["url"] for business in businesses]
print(businesses)
biznames=[]
for val in businesses:
biznames.append(val['name'])
print(biznames)
review_list_master = []
for i in business_url_list:
continue_search = True
reviews_list = []
while continue_search == True:
html_doc = requests.get(i).content
parsed_html = BeautifulSoup(html_doc, 'lxml')
target_rows_url = parsed_html.findAll('div',attrs={'class','review-content'})
for x in target_rows_url:
new_text = x.text.strip().replace('\n','')
date_break_point = new_text.find(' ')
reviews_list.append(new_text[date_break_point+4:len(new_text)])
try:
target_rows_new = parsed_html.find('a',attrs={'class','u-decoration-none next pagination-links_anchor'})
new_url = target_rows_new.get('href')
i = new_url
except:
continue_search = False
review_list_master.append(reviews_list)
print(target_rows_url)
print(parsed_html)
for i in range(len(review_list_master)):
print(biznames[i])
#print(len(review_list_master[i]))
for x in review_list_master[i]:
print(x)
print()
print("------")
Any suggestions would be greatly appreciated. I'm very much so a novice at coding, and I've tried using so many different scrapers that I cannot seem to make work.