7

I have a dataframe with sample classifications:

 Seq_ID   Family Father   Mother   Sex    Role    Type  
   <chr>     <dbl> <chr>    <chr>    <chr>  <chr>   <chr> 
 1 SSC02219 11000. 0        0        Male   Father  Parent
 2 SSC02217 11000. 0        0        Female Mother  Parent
 3 SSC02254 11000. SSC02219 SSC02217 Male   Proband Child 
 4 SSC02220 11000. SSC02219 SSC02217 Female Sibling Child 
 5 SSC02184 11001. 0        0        Male   Father  Parent
 6 SSC02181 11001. 0        0        Female Mother  Parent
 7 SSC02178 11001. SSC02184 SSC02181 Male   Proband Child 
 8 SSC03092 11002. 0        0        Male   Father  Parent
 9 SSC03078 11002. 0        0        Female Mother  Parent
10 SSC03070 11002. SSC03092 SSC03078 Female Proband Child 

Currently, to go from a to b, I have to do this:

library(tidyverse)
library(janitor)

sample.df %>% tabyl(Role, Sex) %>% 
  adorn_totals(where=c("row", "col") ) %>% 
  as.tibble() %>% select(1,4,3,2) %>%
  # Part 2
  mutate(type=c("parent", "parent", "child", "child", " ")) %>% 
  inner_join(., group_by(., type) %>% 
  summarise(total=sum(Total))) %>% 
  select(5,6,1,2,3,4)

I feel like this is such a workaround for something very simple. Is there a more direct way to do the second part in dplyr?

a enter image description here

b enter image description here

www
  • 38,575
  • 12
  • 48
  • 84
Carmen Sandoval
  • 2,266
  • 5
  • 30
  • 46

2 Answers2

2

Here is an option. as.tibble is not necessary. mutate with case_when is more manageable when you have a lot of classes to assign to "parent" or "child". inner_join is not required as we can use group_by and mutate to calculate the total. Finally, I like to write down the column names when I use the select function as it will easier for me to read in the future, but you can of course use column indices as long as you are confident that the column indices would be unchanged no matter what new analyses you may include in your pipe operation.

library(tidyverse)
library(janitor)

sample.df %>% 
  tabyl(Role, Sex) %>% 
  adorn_totals(where=c("row", "col")) %>% 
  select(Role, Total, Male, Female) %>%
  # Part 2
  mutate(type = case_when(
    Role %in% c("Mother", "Father")      ~"parent",
    Role %in% c("Proband", "Sibling")    ~"child",
    TRUE                                 ~" "
  )) %>% 
  group_by(type) %>% 
  mutate(total = sum(Total)) %>%
  ungroup() %>%
  select(type, total, Role, Total, Male, Female)
# # A tibble: 5 x 6
#   type   total Role    Total  Male Female
#   <chr>  <dbl> <chr>   <dbl> <dbl>  <dbl>
# 1 parent    6. Father     3.    3.     0.
# 2 parent    6. Mother     3.    0.     3.
# 3 child     4. Proband    3.    2.     1.
# 4 child     4. Sibling    1.    0.     1.
# 5 " "      10. Total     10.    5.     5.

DATA

library(tidyverse)
library(janitor)

sample.df <- read.table(text = "Seq_ID   Family Father   Mother   Sex    Role    Type  
 1 SSC02219 11000  0        0        Male   Father  Parent
 2 SSC02217 11000  0        0        Female Mother  Parent
 3 SSC02254 11000  SSC02219 SSC02217 Male   Proband Child 
 4 SSC02220 11000  SSC02219 SSC02217 Female Sibling Child 
 5 SSC02184 11001  0        0        Male   Father  Parent
 6 SSC02181 11001  0        0        Female Mother  Parent
 7 SSC02178 11001  SSC02184 SSC02181 Male   Proband Child 
 8 SSC03092 11002  0        0        Male   Father  Parent
 9 SSC03078 11002  0        0        Female Mother  Parent
10 SSC03070 11002  SSC03092 SSC03078 Female Proband Child ",
                        header = TRUE, stringsAsFactors = FALSE)

sample.df <- as_tibble(sample.df)
www
  • 38,575
  • 12
  • 48
  • 84
1

Another option could be using knitr

library(janitor)
library(tidyverse)
library(kableExtra)
library(knitr)

sample.df %>% 
  tabyl(Role, Sex) %>%
  adorn_totals(where=c("row", "col")) %>%
  # Part 2
  mutate(type=case_when(
    Role %in% c('Father', 'Mother') ~ 'parent',
    Role %in% c('Proband', 'Sibling') ~ 'child',
    TRUE ~ ''
  )) %>%
  group_by(type) %>%
  mutate(total=sum(Total)) %>%
  ungroup() %>%
  kable("html") %>%
  kable_styling(c("striped", "bordered")) %>%
  collapse_rows(columns = c(5,6))

Output is:

enter image description here

Sample data:

sample.df  <- structure(list(Seq_ID = c("SSC02219", "SSC02217", "SSC02254", 
"SSC02220", "SSC02184", "SSC02181", "SSC02178", "SSC03092", "SSC03078", 
"SSC03070"), Family = c(11000L, 11000L, 11000L, 11000L, 11001L, 
11001L, 11001L, 11002L, 11002L, 11002L), Father = c("0", "0", 
"SSC02219", "SSC02219", "0", "0", "SSC02184", "0", "0", "SSC03092"
), Mother = c("0", "0", "SSC02217", "SSC02217", "0", "0", "SSC02181", 
"0", "0", "SSC03078"), Sex = c("Male", "Female", "Male", "Female", 
"Male", "Female", "Male", "Male", "Female", "Female"), Role = c("Father", 
"Mother", "Proband", "Sibling", "Father", "Mother", "Proband", 
"Father", "Mother", "Proband"), Type = c("Parent", "Parent", 
"Child", "Child", "Parent", "Parent", "Child", "Parent", "Parent", 
"Child")), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", 
"9", "10"), class = c("tbl_df", "tbl", "data.frame"))
Prem
  • 11,775
  • 1
  • 19
  • 33