0

I'm trying to use this code:

require(RSelenium)
checkForServer()
startServer()
remDr<-remoteDriver()
remDr$open()

appURL <- 'http://www.mtmis.excise-punjab.gov.pk'
remDr$navigate(appURL)
remDr$findElement("name", "vhlno")$sendKeysToElement(list("ria-07-777"))

Can't figure out css selector

remDr$findElements("class", "ent-button-div")[[1]]$clickElement()

after searching query

elem <- remDr$findElement(using="class", value="result-div") 
elemtxt <- elem$getElementAttribute("outerHTML")[[1]] 
elemxml <- htmlTreeParse(elemtxt, useInternalNodes=T) 
final <- readHTMLTable(elemxml)

remDr$close()
rD[["server"]]$stop()

What I want is to create an automated "for loop" with different vehicles from list and merge all into one final table with unique identifier, e.g., "ria-07-777".

list <- c("ria-07-776", "ria-07-777", "ria-07-778")
Samuel
  • 2,895
  • 4
  • 30
  • 45
Janjua
  • 235
  • 2
  • 13

1 Answers1

1

Why do you need Selenium?

library(httr)
library(rvest)

clean_cols <- function(x) {
  x <- tolower(x)
  x <- gsub("[[:punct:][:space:]]+", "_", x)
  x <- gsub("_+", "_", x)
  x <- gsub("(^_|_$)", "", x)
  make.unique(x, sep = "_")
}

get_vehicle_info <- function(vhlno) {

  POST(
    url = 'http://www.mtmis.excise-punjab.gov.pk/',
    set_cookies(has_js=1),
    body = list(vhlno=vhlno)
  ) -> res

  stop_for_status(res)

  pg <- content(res)
  rows <- html_nodes(pg, xpath=".//div[contains(@class, 'result-div')]/table/tr[td[not(@colspan)]]") 

  cbind.data.frame(
    as.list(
      setNames(
        html_text(html_nodes(rows, xpath=".//td[2]")),
        clean_cols(html_text(html_nodes(rows, xpath=".//td[1]")))
      )
    ),
    stringsAsFactors=FALSE
  )

}

Now use ^^:

vehicles <- c("ria-07-776", "ria-07-777", "ria-07-778")

Reduce(
  rbind.data.frame,
  lapply(vehicles, function(v) {
    Sys.sleep(5) # your desire to steal a bunch of vehicle info to make a sketch database does not give you the right to hammer the server, and you'll very likely remove this line anyway, but I had to try
    get_vehicle_info(v)
  })
) -> vehicle_df

str(vehicle_df)
## 'data.frame': 3 obs. of  12 variables:
##  $ registration_number: chr  "ria-07-776" "ria-07-777" "ria-07-778"
##  $ chassis_number     : chr  "KZJ95-0019869" "NFBFD15746R101101" "NZE1206066278"
##  $ engine_number      : chr  "1KZ-0375851" "R18A11981105" "X583994"
##  $ make_name          : chr  "LAND - CRUISER" "HONDA - CIVIC" "TOYOTA - COROLLA"
##  $ registration_date  : chr  "17-Dec-2007 12:00 AM" "01-Aug-2007 12:00 AM" "01-Jan-1970 12:00 AM"
##  $ model              : chr  "1997" "2006" "2007"
##  $ vehicle_price      : chr  "1,396,400" "1,465,500" "0"
##  $ color              : chr  "MULTI" "GRENDA B.P" "SILVER"
##  $ token_tax_paid_upto: chr  "June 2015" "June 2011" "June 2016"
##  $ owner_name         : chr  "FATEH DIN AWAN" "M BILAL YASIN" "MUHAMMAD ALTAF"
##  $ father_name        : chr  "HAFIZ ABDUL HAKEEM AWAN" "CH M. YASIN" "NAZAR MUHAMMAD"
##  $ owner_city         : chr  "RAWALPINDI" "ISLAMABAD" "SARGODHA"

You'll need to handle network and scraping errors on your own. I can't justify any more time for this likely unethical endeavour (the answer was more to help others with similar q's).

hrbrmstr
  • 77,368
  • 11
  • 139
  • 205