At work I was tasked with doing a market analysis of bricks. I chose some competitors and made web scrapers to collect their prices. It works for most brick types, however on some it will change the value or say there's isn't a match when there is.
The issues are only with the Prices_Building. The rest of the code works well, weirdly, if I use the Prices_Building code to search for just one name, it'll get it right.
Here is an image of Output spreadsheet
the ones in green are correct values on the website, the ones in red are incorrect with the correct value in {} if it exists.
Here is my code:
sheet = client.open("Bricks Compare Prices").get_worksheet(0)
Prices_Amari = []
Prices_Wholesale = []
Prices_Building = []
Names = []
Prices_Amari = []
#List of bricks to compare
lis = [ (list of names boiled down to NAME pack of SIZE
]
Prices_Building = []
Namez = []
for name in lis: # for every name in the list
target = name.rpartition("Pack")[0] #get the essential name
pack_size = re.search(pattern = '[0-9]+', string=name).group() #get the pack size
res = requests.get("https://eucs13.ksearchnet.com/cloud-search/n-search/search?ticket=klevu-15598202362809967&term={}&paginationStartsFrom=0&sortPrice=false&ipAddress=undefined&analyticsApiKey=klevu-15598202362809967&showOutOfStockProducts=true&klevuFetchPopularTerms=false&klevu_priceInterval=500&fetchMinMaxPrice=true&klevu_multiSelectFilters=true&noOfResults=1&klevuSort=rel&enableFilters=true&layoutVersion=1.0&autoComplete=false&autoCompleteFilters=&filterResults=&visibility=search&category=KLEVU_PRODUCT&klevu_filterLimit=50&sv=2316&lsqt=&responseType=json&klevu_loginCustomerGroup=".format(name))
results = json.loads(res.text)['result'] #go to this site, search for the brick
for i in results: #for every result, check that the name and pack size is in the title, or sau there's no match
if target in i['name'] and pack_size in i['name']:
Prices_Building.append(i['salePrice'])
Namez.append(i['name'])
else:
Prices_Building.append("No match in Building Supplies Online" + name)
Namez.append(i['name'])
#repeat for other website for Name in lis:
def get_url_Amaari(search_term):
build = 'https://ammaaristones.co.uk/?s={}&post_type=product'
url = build.format(search_term)
return url
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
result_Ammaristones = requests.get(get_url_Amaari(Name), headers=headers)
try:
soupAmm = BeautifulSoup(result_Ammaristones.text, 'lxml')
Par = soupAmm.find('div', class_='box-text box-text-products')
PriceAmm = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?",Par.find('bdi').text)[0]
Prices_Amari.append(PriceAmm)
except:
PriceAmm = "no match in Ammari Stones for:" + Name
Prices_Amari.append(PriceAmm)
pass
#repeat for other website for Name in lis:
try:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
def get_url_Wholesale(search_term):
build = 'https://brickwholesale.co.uk/?s={}&post_type=product&dgwt_wcas=1'
url = build.format(search_term)
return url
result_Wholesale = requests.get(get_url_Wholesale(Name), headers=headers)
soupWhole = BeautifulSoup(result_Wholesale.text, 'html.parser')
Pparent = soupWhole.find_all('span', class_='woocommerce-Price-currencySymbol')
Whole = (float(re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?",soupWhole.find('bdi').text.strip())[0]))*1.2+96
PriceWhole = math.floor(Whole)
if PriceWhole == 96:
PriceWhole = "No Match in Wholesale Bricks for: " + Name
Prices_Wholesale.append(PriceWhole)
except:
PriceWhole = "no match in wholesale Bricks Stones for:" + Name
#print to google sheet one row at a time, matching up the prices for comparison
for j in range(len(lis)):
time.sleep(1)
row =[lis[j],Prices_Amari[j], Prices_Building[j], Prices_Wholesale[j]]
sheet.append_row(row)