0

R: How to plot rpivotTable or dcast table with summarised column in between same as excel for reporting of continues data.

Check attached screenshot and data set, tried different way to add summarised column in dcast and rpivottable but not getting it.

Sample Data set be like.

Buyer       year_month      Late_Days

A           2018-01         0 or Early
B           2018-01         >=5
C           2018-02         >=10
A           2018-04         0 or Early
A           2018-03         >=5
B           2018-03         >=10
C           2018-05         0 or Early
A           2018-06         >=5
B           2018-07         >=10
A           2018-11         0 or Early
B           2018-11         >=5
A           2019-01         >=10
B           2019-01         0 or Early
A           2019-01         >=5
A           2019-02         >=10

Result of dput(DF)

year_month("12-2019", "01-2020", "01-2020", "06-2018", "08-2018", "09-2018", "12-2018", "03-2019",
 "11-2016", "11-2016", "04-2019", "07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017",
 "01-2018", "02-2018","03-2018", "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018",
"07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017", "01-2018", "02-2018","03-2018",
 "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018","08-2017", "09-2017", "03-2018",
 "04-2019") Late_Days = c("<=20", "<=10", "<=5", "<=20", "0 or early", "0 or early", "0 or early", 
"0 or early", "0 or early", "<=30", "0 or early", "<=10", "<=5", 
"0 or early", "0 or early", "0 or early", ">30", "<=20", "<=20", 
"<=20", "<=20", ">30", "<=20", "<=5", "<=5", "<=5", "<=5", "<=10", 
"<=10", ">30", "<=5", "0 or early", "<=5", "0 or early", "0 or early", 
"0 or early", "<=10", "<=5", "0 or early", "0 or early", "0 or early"
 "<=20", "<=5", "<=5", "<=5")Buyer = c( 
"C", "D", "D", "D", "D", "A", "D", "A", "A", "C", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "C", "C", "A", "C", "A", "A", "A", 
"A", "B", "B", "A", "A", "B", "B", "B", "A", "A", "C", "C", "A", "A", "A", 
)row.names = c(1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 20L, 31L, 
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L)

Code i tried

datatable(dcast(inventory, Buyer ~ year_month), filter = 'top')

Check the screenshot of excel report. is it possible to generate in R. enter image description here

mnist
  • 6,571
  • 1
  • 18
  • 41
Nirav Prajapati
  • 265
  • 2
  • 15
  • There is an error in your `dput(DF)`, several informations are missing. For example this is what the `dput()` of the first rows (with `head()`) of the `iris` dataset looks like : `structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4), Sepal.Width = c(3.5, 3, 3.2, 3.1, 3.6, 3.9), Petal.Length = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7), Petal.Width = c(0.2, 0.2, 0.2, 0.2, 0.2, 0.4), Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("setosa", "versicolor", "virginica"), class = "factor")), row.names = c(NA, 6L), class = "data.frame")` – cbo Mar 09 '20 at 13:09

1 Answers1

0

I was not suer what to sum with the strings in your Late_Days-column.. so I replaced the strings with some random numerics (see code below)

** sample data provided **

df <- data.frame(year_month = c("12-2019", "01-2020", "01-2020", "06-2018", "08-2018", "09-2018", "12-2018", "03-2019",
                           "11-2016", "11-2016", "04-2019", "07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017",
                           "01-2018", "02-2018","03-2018", "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018",
                           "07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017", "01-2018", "02-2018","03-2018",
                           "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018","08-2017", "09-2017", "03-2018",
                           "04-2019"), 
                 Late_Days = c("<=20", "<=10", "<=5", "<=20", "0 or early", "0 or early", "0 or early", 
                                                    "0 or early", "0 or early", "<=30", "0 or early", "<=10", "<=5", 
                                                    "0 or early", "0 or early", "0 or early", ">30", "<=20", "<=20", 
                                                    "<=20", "<=20", ">30", "<=20", "<=5", "<=5", "<=5", "<=5", "<=10", 
                                                    "<=10", ">30", "<=5", "0 or early", "<=5", "0 or early", "0 or early", 
                                                    "0 or early", "<=10", "<=5", "0 or early", "0 or early", "0 or early",
                                                    "<=20", "<=5", "<=5", "<=5"),
                 Buyer = c( "C", "D", "D", "D", "D", "A", "D", "A", "A", "C", "A", "A", "A", "A", "A", 
                            "A", "A", "A", "A", "A", "A", "A", "A", "C", "C", "A", "C", "A", "A", "A", 
                            "A", "B", "B", "A", "A", "B", "B", "B", "A", "A", "C", "C", "A", "A", "A" ),
                 stringsAsFactors = FALSE
                 )

code

library( data.table )
library( janitor )
#set data to data.table format
data.table::setDT(df)
#replace Late_Days with numeric, or elese we've got nothing to sum...
set.seed(123)
df[, Late_Days := sample(0:20, nrow(df), replace = TRUE) ]
#get year and month to separate columns
df[, c("month", "year") := data.table::tstrsplit( year_month, "-" ) ][]
#get yearly summarise, with totals
l <- lapply( unique(df$year), function(x) {
  temp <- df[ year == x, ]
  ans <- data.table::dcast( temp[, .(sum(Late_Days)), by = .(Buyer, year, month) ], Buyer ~ year + month, value.var = "V1", fill = 0 )
  ans <- janitor::adorn_totals(ans, where = c("col"), name = paste0( x, "_Total"))
} )
#bind together
ans <- data.table::rbindlist( l, use.names = TRUE, fill = TRUE )
#melt to ling
ans <- data.table::melt( ans, id.vars = "Buyer" )
#get summary per buyer, per period
ans <- ans[, sum(value, na.rm = TRUE), by = .(Buyer, variable)]
#cast to wide again
final <- data.table::dcast( ans, Buyer ~ variable, value.var = "V1" )
#get the order right
colorder = c("Buyer", sort( names(final)[!names(final) == "Buyer"] ) )
#reorder, and get totals by column
janitor::adorn_totals( final[, ..colorder ], where = "row" )

output

# Buyer 2016_11 2016_Total 2017_07 2017_08 2017_09 2017_10 2017_11 2017_12 2017_Total 2018_01 2018_02 2018_03 2018_04 2018_05 2018_06
# A      19         19      18      17      31      13      14       9        102       8      38       9      13      16      10
# B       0          0       0       0       0       0       0       8          8       9       0       0      20       5       1
# C      13         13      14      12       0       0       0       0         26       0       0       0       0       0       0
# D       0          0       0       0       0       0       0       0          0       0       0       0       0       0       2
# Total  32         32      32      29      31      13      14      17        136      17      38       9      33      21      13
#    2018_07 2018_08 2018_09 2018_12 2018_Total 2019_03 2019_04 2019_12 2019_Total 2020_01 2020_Total
#       4       7      28       0        133       4       9       0         13       0          0
#       0       0       0       0         35       0       0       0          0       0          0
#       6      20      11       0         37       0       0      14         14       0          0
#       0       9       0      10         21       0       0       0          0      31         31
#       10     36      39      10        226       4       9      14         27      31         31
Wimpel
  • 26,031
  • 1
  • 20
  • 37
  • Late_Days is not important here, need to count number of orders get from buyer per month_year. Check the values result getting from `datatable(dcast(df, Buyer ~ year_month), filter = 'top')` with this. and in this dacst result want to displayed year wise summary. – Nirav Prajapati Mar 11 '20 at 07:08
  • From `datatable(dcast(df, Buyer ~ year_month), filter = 'top')` Got the exact result same as displayed in excel, but unable to add year wise summarised column in between, which should be auto generated as per the get the data(month_year) from db. – Nirav Prajapati Mar 11 '20 at 07:21