I've been working on a project where the goal is to take a two-column CSV of street addresses and zip codes, read it into R, then perform a Zillow query for each one (GetSearchResults, specifically), parse the output, and store the parsed output in a dataframe to be written to a CSV (and placed right next to the existing data).
caveat: I can only call one address/zip combo at a time through the zillow API, so anything that violates that is off the table immediately.
As of this point, I have about 85% of the work done. I have i) a bit of code that can, one-by-one, query those address/zip combos from a dataframe as well as ii) a tentative way of putting that input back into a dataframe,
library(ZillowR)
library(rvest)
library(dplyr)
library(DT)
# this commented section is what I would use instead of creating the dataframe manually below, just for clarity
# data1 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V1
# data2 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V2
# data = data.frame(street = data1, city.state = as.character(data2))
# per comments, should add a "stringsAsFactors = FALSE" in the dataframe part
data = data.frame(
street = c('77 Huntington Ave',
'85 Prospect St',
'219 Lincoln St'),
city.state = c(rep('01752', 3)))
get.zillowdata = function(df, address, city.state){
require(ZillowR)
set_zillow_web_service_id('API KEY')
results = do.call(rbind, lapply(1:nrow(df), function(i){
z = tryCatch({
zdata = GetDeepSearchResults(address = df$street[i],
citystatezip = df$city.state[i],
zws_id = getOption("ZillowR-zws_id"),
url = "http://www.zillow.com/webservice/GetDeepSearchResults.htm")
return(zdata)
},
error = function(cond) {
message(paste("No Data Available:", df$street[i], df$city.state[i]))
return(NA) # Choose a return value in case of error
},
warning = function(cond) {
message(paste("Zdata caused a warning:", df$street[i], df$city.state[i]))
return(NA) # Choose a return value in case of warning
},
# print processing message to screen
finally = {
message(paste("Processed Address:", df$street[i], df$city.state[i]))
message(paste(i, "of", nrow(df), 'processed'))
}
)
}))
if(nrow(results)==nrow(df)){
results = cbind(df, results)
print(paste('Original data had', nrow(df), 'rows. Returning a dataframe with', nrow(results),
'rows. Returned dataframe has', sum(is.na(results$amount)), 'missing zdata values.'))
return(results)
}
else(print("Error: nrows(df) do not match nrows(zdata)"))
}
get.zillowdata(data)
`
and also iii) a parser for the XMLnode response that you get when you perform a query through the Zillow API which picks out specific child values (zestimate, square footage, lot size, etc; whatever you specify)
library(ZillowR)
library(XML)
library(RCurl)
set_zillow_web_service_id('API KEY')
output123 = GetDeepSearchResults(address = 'STREET ADDRESS', citystatezip = '0ZIP CODE', zws_id = getOption("ZillowR-zws_id"), url = "http://www.zillow.com/webservice/GetSearchResults.htm")
results <- xmlToList(output123$response[["results"]])
getValRange <- function(x, hilo) {
ifelse(hilo %in% unlist(dimnames(x)), x["text",hilo][[1]], NA)
}
out <- apply(results, MAR=2, function(property) {
zpid <- property$zpid
links <- unlist(property$links)
address <- unlist(property$address)
z <- property$zestimate
zestdf <- list(
amount=ifelse("text" %in% names(z$amount), z$amount$text, NA),
lastupdated=z$"last-updated",
valueChange=ifelse(length(z$valueChange)==0, NA, z$valueChange),
valueLow=getValRange(z$valuationRange, "low"),
valueHigh=getValRange(z$valuationRange, "high"),
percentile=z$percentile)
list(id=zpid, links, address, zestdf)
})
data <- as.data.frame(do.call(rbind, lapply(out, unlist)),
row.names=seq_len(length(out)))
But I'm a little stuck at this point. How should I put these together so that I can include the parsing at the end of the api call part and make sure that both of them get iterated over the full list of addresses/zips? My code right now isn't in any particular order, so feel free to move things around if you decide to tackle this, and if anyone needs additional information, I'm happy to clarify!
Thanks very much in advance.