I'm scraping the site Quicker.com but every time getting an error on random pages.
The error is:
UnexpectedAlertPresentException: Alert Text: C:\Users\HEYPIL~1\AppData\Local\Temp\Pkwnr4IA.php.part could not be saved, because the source file could not be read.
Try again later, or contact the server administrator.
<super: <class 'WebDriverException'>, <UnexpectedAlertPresentException object>>
My code:
from selenium import webdriver
import csv
import re
import hashlib
from selenium.common.exceptions import UnexpectedAlertPresentException
from selenium.common.exceptions import WebDriverException
import socket
import time
import datetime
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
date = st.encode('utf8')
IPAdd = socket.gethostbyname(socket.gethostname())
counter = 5
initial = []
base = "http://mumbai.quikr.com/Individual/0-50000000/Houses-Apartments-for-Sale/w1072?imageAds=Y&l=You_are-Price"
string = "&page="
while(counter == 5 or counter < 40):
base2 = base+string+str(counter)
if (counter < 39):
initial.append(base2)
elif(counter == 40):
initial.append(base)
else:
base2 = base
counter += 1
for n in initial:
result = []
driver = webdriver.Firefox()
driver.get(n)
main_page = '//a[@class="adttllnk unbold"]'
for a in driver.find_elements_by_xpath(main_page):
l = a.get_attribute('href')
result.append(l)
print len(result)
driver.close()
for url in result:
try:
driver = webdriver.Firefox()
driver.get(url)
items = []
desc_path = '//div[@id="ad_description"]'
img_path = '//div[@class="bigImg_wapp"]//img[@src]'
prop = '//div[@itemprop="name"]//h1[@class="ad_title translate"]'
amenities = '//div[@class="ad-atrbt-panel"]//span[@class="ad-atrbt-val"]//span[@class="attribVal newattribVal"]'
phone = '//span[@class="NoVerified-Text"]'
for x1 in driver.find_elements_by_xpath(img_path):
img = (x1.get_attribute('src')).encode('utf8')
print '\n'+img
for x2 in driver.find_elements_by_xpath(desc_path):
desc = (x2.text).encode('utf8')
print '\n'+ desc
for x3 in driver.find_elements_by_xpath(prop):
prop_title = (x3.text).encode('utf8')
print '\n'+prop_title
for x4 in driver.find_elements_by_xpath(amenities):
value = (x4.text).encode('utf8')
items.append(value)
print '\n'
print items
locality = items[0]
locality1 = locality.encode('utf8')
a = (locality1 if (isinstance(locality1,int) == False) else "")
bhk = items[1]
bhk1 = bhk.encode('utf8')
if(bhk1 == "4+ BHK"):
b = "4"
else:
bhk2 = [int(z) for z in bhk1.split() if z.isdigit()]
b = ((str(bhk2).strip('[')).strip(']')).strip()
furnish = items[2]
if(isinstance(furnish,int) == False ):
furnish1 = furnish.encode('utf8')
if((furnish1 == "Semi-Furnished") or (furnish1 == "Unfurnished") or (furnish1 == "Fully Furnished") or (furnish1 == "Unfurnished,Unf...")):
c = furnish1
else:
d = furnish1
elif(isinstance(furnish,int) == True):
furnish1 = furnish.encode('utf8')
d = furnish1
else:
c = ""
sqft = items[3]
if(isinstance(sqft,int)==True):
sqft1 = [int(xyz) for xyz in sqft.split() if xyz.isdigit()]
sqft2 = ((str(sqft1).strip('[')).strip(']')).strip()
d = sqft2.encode('utf8')
elif(isinstance(sqft,int)==False):
sqft1 = sqft.encode('utf8')
if((sqft1 == "Semi-Furnished") or (sqft1 == "Unfurnished") or (sqft1 == "Fully Furnished") or (sqft1 == "Unfurnished,Unf...")):
c = sqft1
else:
d = sqft1
else:
d = ""
atz = '\t'
print a,atz,b,atz,c,atz,d
for x5 in driver.find_elements_by_xpath(phone):
biz = (((x5.text).lstrip('+91')).strip()).encode('utf8')
if(len(biz)== 9):
biz_phone = '9'+biz
elif(len(biz) < 7 and len(biz) > 4):
biz_phone = '080'+biz
elif(len(biz) > 9 or len(biz) < 12):
biz_phone = biz
elif(len(biz) == 4 or len(biz) < 4):
biz_phone = biz.strip(biz)
else:
print '\nInvalid Business_phone'
print '\n'+biz_phone
driver.close()
hash_key = hashlib.md5("marketing@"+biz_phone+".com"+"Individual"+prop_title).hexdigest()
unique_key = ('I_'+hash_key).encode('utf8')
except (NameError, IndexError, WebDriverException, UnexpectedAlertPresentException) as e:
print "Failed to open: "+url
driver.close()
fieldname = ['Date','URL']
with open("C:\Users\Heypillow\Desktop\scrapWork\properties\\Failed_to_open_url.csv",'a') as h:
write = csv.DictWriter(h,fieldnames=fieldname,lineterminator = '\n')
write.writerow({'Date':date,
'URL':url})
I've blocked the pop-up in Firefox() but yet a pop-up is coming which addressing me to save a .php file and raises this exception.
I've already used that exception in the "except" part yet it's interrupting the code to work further and it's getting stopped just after this exception rises.
So, every time this exception rises, I have to restart the program. Thus I would like to download all the data by running the code through out the night,which is impossible with this circumstances…
How can I prevent this pop-up from opening?
(If I would have been able to upload a screenshot of the pop-up,it would have been easier to understand it.)