EDIT: Yes, when I print the links in the second for loop, I see the correct links in the python console.
I am trying to scrape links from search engine and then follow those links to scrape additional data, however my code will not follow the scraped links and returns "not found." What am I doing wrong?
import urllib.request
import bs4 as bs
#iterate the pages in the search result
for page in range(0,count, 100):
source = urllib.request.urlopen('www.mywebsite.com').read()
soup = bs.BeautifulSoup(source, 'lxml')
#data to be captured
url = ['www.mywebsite.com'+a.get('href') for a in soup.find_all('a', {'class' : ['result-title hdrlnk']})]
postdate = [pd.get('title') for pd in soup.find_all('time', {'class' : ['result-date']})]
price = [span.string for span in soup.find_all('span', {'class' : ['result-price']})]
bedroom = [span.get_text(strip=True).strip() for span in soup.find_all('span', {'class' : ['housing']})]
#follow the links returned in the search result to scrape additional data
for link in url:
print(link) #this displays each link properly in the console
source2 = urllib.request.urlopen(link).read()
soup2 = bs.BeautifulSoup(source2,'lxml')