I have two URLs of the same page that I want to scrape to get room prices in NYC. I used BeautifulSoup to get the address, price, and availability of each room. After that, I make a dictionary so that I can create a DataFrame.
I am getting two different DataFrames for each URL, but I want the information about​ each URL to be in one single DataFrame.
After I get the information that I need I am appending it to a list that I later use for a dictionary
def getRoomInfo (startingPage):
html = requests.get (startingPage)
bs1 = BeautifulSoup (html.text, "html.parser")
url = "{}://{}".format (urlparse (startingPage).scheme, urlparse(startingPage).netloc)
href_links = []
for link in bs1.find_all("a", href = re.compile ("/new-york-apartment/roommate-share/"+"\d+")):
href_links.append (link["href"])
room_link = []
for links in href_links:
room_link.append(url+links)
addressList =[]
priceList = []
availabilityList = []
for page in room_link:
html_page = requests.get (page)
bs_page = BeautifulSoup (html_page.text, "html.parser")
address = bs_page.find ("div",{"class": "ap-info-address"} )
addressList.append (address.get_text())
price = bs_page.find ("div",{"class": "apt-price price-cur-1"} )
priceList.append (price.get_text())
availability = bs_page.find ("td")
availabilityList.append (availability.get_text())
infoDataFrame = pd.DataFrame (
{"Address": addressList,
"Price": priceList,
"Availability": availabilityList,
})
print (infoDataFrame)
links_rooms = ("https://www.nyhabitat.com/new-york-apartment/roommate-share", "https://www.nyhabitat.com/new-york-apartment/list.php?page=2&dep=SH&lev=3&price=400;2400&guest=1&sort=new&cll=1&searchMapLeft=40.60484725779594&searchMapTop=-73.81336257537379&searchMapRight=40.90185344223534&searchMapBottom=-74.14810226043238&searchMapZoom=11&div_code=ny&lang=en")