2

I have a dataset called ballons with two clusters : True and false. I'm searching to count modalities frequencies per cluster and per column, so I tried:

library(ggplot2)
library(tidyverse)

ballons=structure(list(YELLOW = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("PURPLE", 
"YELLOW"), class = "factor"), SMALL = structure(c(2L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L
), .Label = c("LARGE", "SMALL"), class = "factor"), STRETCH = structure(c(2L, 
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
1L, 1L), .Label = c("DIP", "STRETCH"), class = "factor"), ADULT = structure(c(1L, 
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 
1L, 2L), .Label = c("ADULT", "CHILD"), class = "factor"), T = c(TRUE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA, 
-19L))

r1=group_split(ballons %>%
  group_by(T))

print(r1)


data.frame(do.call(rbind, lapply(r1,function(x) sapply(x,table))))


  YELLOW SMALL STRETCH ADULT  T
1   6, 6  6, 6    8, 4  4, 8 12
2   4, 3  4, 3    0, 7  7, 0  7    

However I'm searching this representation ( expected output ):

      YELLOW PURPLE SMALL LARGE ...
FALSE   6    6      6      6    ...
TRUE    4    3      4      3    etc

I also tried without success:

lapply(r1,function(x) sapply(x,table))
data.frame(do.call(rbind, lapply(r1,function(x) sapply(x,table))))
sapply(ballons,table,ballons$T)
Brian Tompsett - 汤莱恩
  • 5,753
  • 72
  • 57
  • 129
Tou Mou
  • 1,270
  • 5
  • 16
  • 2
    Not specific to this question, but calling a column `T` might cause some headaches at some point, either by confusing functions or confusing yourself, since it can be used as a shorthand to evaluate to `TRUE` – camille Jun 16 '21 at 16:40

3 Answers3

3

You are already using tidyverse so you can use a tidy solution

table(ballons %>% pivot_longer(cols = !c(T)) %>% select(T,value))
Sandwichnick
  • 1,379
  • 6
  • 13
2

Another approach could be to use janitor::tabyl which is useful for tables:

library(janitor)
tabyl(ballons, T, YELLOW)
#      T PURPLE YELLOW
#  FALSE      6      6
#   TRUE      4      3

Now loop over all columns and then join output:

ballons[-5] %>% 
  imap(~tabyl(dat = ballons, T, !!sym(.y))) %>% 
  reduce(full_join, by = "T")
#      T PURPLE YELLOW LARGE SMALL DIP STRETCH ADULT CHILD
#  FALSE      6      6     6     6   8       4     4     8
#   TRUE      4      3     4     3   0       7     7     0

!!sym helps convert the individual columns to objects, see @Lionel Henry explanation here and @Moody_Mudskipper with tabyl here.

user63230
  • 4,095
  • 21
  • 43
1
library(tidyverse)

ballons %>% 
  pivot_longer(-T) %>% 
  pivot_wider(T, names_from = value, values_fn = length, values_fill = 0)
#> # A tibble: 2 x 9
#>   T     YELLOW SMALL STRETCH ADULT CHILD   DIP LARGE PURPLE
#>   <lgl>  <int> <int>   <int> <int> <int> <int> <int>  <int>
#> 1 TRUE       3     3       7     7     0     0     4      4
#> 2 FALSE      6     6       4     4     8     8     6      6
Yuriy Saraykin
  • 8,390
  • 1
  • 7
  • 14