Im sorry if this is a brainf*rt question - its probably a simple error handling. This code breaks when one of the variables hits a blank (in this case in the 'num_views' variable) - Is there a way to return an 'NA' for any blank values? I would be so grateful for any advice
The error response is:
Error: All columns in a tibble must be vectors.
Column num_views
is a function.
# Custom functions
parse_listing <- function(listing){
# Review content
address <- listings %>% html_nodes(xpath = '//p[@data-testid="listing-description"]') %>% html_text2()
link <- listings %>% html_nodes(xpath = '//div[@data-testid="search-result"]/div/div/a[2]') %>% html_attr('href') %>% paste("https://www.zoopla.co.uk", ., sep="")
prop_type <- listings %>% html_nodes(xpath = '//h2[@data-testid="listing-title"]') %>% html_text2()
price <- listings %>% html_nodes(xpath = '//div[@data-testid="listing-price"]/p[@size="6"]') %>% html_text2() %>% str_remove_all("[£,]")
est_agent <- listings %>% html_nodes(xpath = '//div[@data-testid="search-result"]//a/img') %>% html_attr('alt') %>% str_remove('Marketed by ')
date_listed <- listings %>% html_nodes(xpath = '//span[@data-testid="date-published"]') %>% html_text2() %>% str_remove('Listed on ') %>% dmy()
num_views <- possibly(listings %>% html_nodes(xpath = '//span[@data-testid="number-of-views"]') %>% html_text2() %>% str_remove(' views'), otherwise = NULL)
tibble(address, link, prop_type, price, est_agent, date_listed, num_views)
}
# Script
link <- 'https://www.zoopla.co.uk/for-sale/property/wd3/?page_size=25&q=wd3&radius=0&results_sort=most_popular&search_source=refine&pn=7'
page <- read_html(link)
listings <- page %>% html_nodes(xpath = '//div[@data-testid="search-result"]')
wd3p7 <- map_dfr(listings, parse_listing)