-1

I try to remove rows in an HTML table with a 0.0000 em specific column, using the readHTMLTable() function in XLM package, without success. In my code:

#Packages

    require(httr)
    require(XML)

#Function for read HTML table
#For remove 0.0000 in columns 9 and 10

    readFE<- function (x, URL = ""){
    FILE <- GET(url=URL)
         tables <- getNodeSet(htmlParse(FILE), "//table") 
         FE_tab <- readHTMLTable(tables[[1]], 
                            header = c("empresa","desc_projeto","desc_regiao", 
                                       "cadastrador_por","cod_talhao","descricao", 
                                       "formiga_area","qtd_destruido","latitude", 
                                       "longitude","data_cadastro"), 
                            colClasses = c("character","character","character", 
                                           "character","character","character", 
                                           "character","character","character", 
                                           "character","character"), 
                            trim = TRUE, stringsAsFactors = FALSE 
    )     
         x<-NULL
         results <- x
         x<-FE_tab[-(1),]
         results <- x
         results<-results[!apply(results,1,function(x){any(x[,9:10]==0.00000000)}),]
         results
    }

Example:

tableFE<-readFE(URL="https://www.dropbox.com/s/mb316ghr4irxipr/TALHOES_AGENTES.htm?dl=1")
tableFE## Doesn't work!!
Brian Tompsett - 汤莱恩
  • 5,753
  • 72
  • 57
  • 129
Leprechault
  • 1,531
  • 12
  • 28

1 Answers1

0

Here's how to work with it in xml2 / rvest:

library(xml2)
library(rvest)

pg <- read_html("TALHOES_AGENTES.htm")
tab <- html_table(pg, header=TRUE)[[1]]
subset(tab, !(latitude == "0.00000000" | longitude == "0.00000000"))
hrbrmstr
  • 77,368
  • 11
  • 139
  • 205