1

How to get the final query in data.table format?

if not possible: how to rewrite the query from dplyr to data.table ?

library(data.table)
library(dplyr)
library(dtplyr)

#Data    
    dt = structure(list(Year = c(2015L, 2016L, 2017L, 2015L, 2016L, 2017L),
                        Item = c("Soybeans", "Soybeans", "Soybeans", "Pulses, Total", "Pulses, Total", "Pulses, Total"), 
                        Value = c(884688L, 829166L, 960640L, 2219455L, 2354696L, 2683772L)),
                   row.names = c(NA, -6L), class = "data.frame")
    
# query in  dplyr  
    dt %>% 
      group_by(Year) %>% 
      summarise(Value = sum(Value), Item = "Total") %>% 
      bind_rows(., dt) 
    
#conveert query to data.table    
    dtl=lazy_dt(dt)
    dtl %>% 
      group_by(Year) %>% 
      summarise(Value = sum(Value), Item = "Total") %>%
      bind_rows(., dtl) %>% show_query()

Error: Argument 1 must be a data frame or a named atomic vector.

Waldi
  • 39,242
  • 6
  • 30
  • 78
HerClau
  • 161
  • 2
  • 15

1 Answers1

1

Using data.table you may write this operation as -

library(data.table)

setDT(dt)
rbind(dt[, .(Value = sum(Value), Item = "Total"), Year], dt)

#   Year   Value          Item
#1: 2015 3104143         Total
#2: 2016 3183862         Total
#3: 2017 3644412         Total
#4: 2015  884688      Soybeans
#5: 2016  829166      Soybeans
#6: 2017  960640      Soybeans
#7: 2015 2219455 Pulses, Total
#8: 2016 2354696 Pulses, Total
#9: 2017 2683772 Pulses, Total
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213