1

I often report on multiple factor variables. I want a space efficient summary table of these variables -- that is, I'd like to report on multiple factors without repeating the labels for each. Here are my data:

df<-
structure(list(answer3 = 
               structure(c(NA, 2L, NA, 1L, 2L), 
              levels = c("Strongly agree", 
              "Agree", "Neutral", "Disagree", "Strongly disagree"), label = "Confident in math class", class = c("labelled", 
              "factor")), answer4 = structure(c(NA, 2L, NA, 2L, 2L), levels = c("Strongly agree", 
              "Agree", "Neutral", "Disagree", "Strongly disagree"), label = "Strong belong scientific community", class = c("labelled", 
              "factor")), answer5 = structure(c(NA, 5L, NA, 2L, 3L), levels = c("Strongly agree", 
              "Agree", "Neutral", "Disagree", "Strongly disagree"), label = "Think myself a scientist", class = c("labelled", 
              "factor")), answer6 = structure(c(NA, 3L, NA, 1L, 3L), levels = c("Strongly agree", 
              "Agree", "Neutral", "Disagree", "Strongly disagree"), label = "Important to learn concepts", class = c("labelled", 
              "factor")), answer7 = structure(c(NA, 2L, NA, 3L, 2L), levels = c("Strongly agree", 
              "Agree", "Neutral", "Disagree", "Strongly disagree"), label = "Goal learn as much as I can", class = c("labelled", 
              "factor")), answer8 = structure(c(NA, 1L, NA, 3L, 2L), levels = c("Strongly agree", 
              "Agree", "Neutral", "Disagree", "Strongly disagree"), label = "Later changes depend on doing well", class = c("labelled", 
              "factor"))), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"))

The first approach here results in repeated labels:

df %>% 
 tab_cells(answer3,answer4,answer5,answer6,answer7,answer8) %>%  
 tab_rows() %>%  tab_stat_cpct() %>%  
 tab_pivot()  # this approach repeats values for each variable

The second approach pivots the data, but pivoting strips the labels:

    df %>% 
      to_long(cols=starts_with("answ"),value_factor = T) %>% # variable labels are erased
        tab_cells(value) %>%
        tab_cols(variable) %>%
     tab_stat_cpct() %>%
    tab_pivot() %>% tab_transpose() 

How can I get the pivoted variables without losing the labels?

Ben
  • 1,113
  • 10
  • 26

2 Answers2

1

I came up with this convoluted, albeit functional, solution.
Note that I want to stick with expss as my reporting approach given its general utility and applicability to many of my projects.

Please comment with more efficient approaches...

 # extract and save the labels for each variable
 myVarnames<-map_chr(df, ~attr(.x, "label")) %>% 
 bind_cols(names = names(df), question = .)

 # remove attributes prior to pivoting
 temp[] <- lapply(temp, function(x) { attributes(x) <- NULL; x })

 # pivot the table
 temp<-df %>%  pivot_longer(cols=starts_with("answ"))

 # reapply the attributes as column
 temp<-left_join(temp,myVarnames,by=c("name"="names")) 

 # reapply factor params
 temp$value<-factor(temp$value,
               levels=c(1,2,3,4,5),
               labels=c("Strongly agree","Agree", "Neutral","Disagree","Strongly disagree"),ordered=T)


 # finally, use `expss` to create the output table:
 t<-temp %>% 
   tab_cells(question) %>%
   tab_cols(value) %>%
   tab_stat_cpct() %>%
   tab_pivot() %>% as_huxtable() # The result is formatted well for Word and PDF.
Ben
  • 1,113
  • 10
  • 26
0

I can only suggest special function for stacking:

stack_with_labels = function(df, cols = NULL){
    if(is.function(cols)){
        cols = cols(colnames(df))
    }
    if(is.null(cols)) cols = TRUE
    need_cols = df[,cols]
    all_var_labs = lapply(need_cols, var_lab)
    # check for empty labels
    no_var_lab = lengths(all_var_labs) == 0
    all_var_labs[no_var_lab] = "|"
    all_var_labs = rep(unlist(all_var_labs), each = nrow(df))
    need_cols = lapply(need_cols, as.labelled)
    value = do.call(c, need_cols)
    res = data.frame(variable = all_var_labs, value = value)
    var_lab(res) = "|"
    res
}


df %>% 
    columns("^answ") %>% 
    stack_with_labels() %>% 
    tab_cells(value) %>%
    tab_cols(variable) %>%
    tab_stat_cpct() %>%
    tab_pivot() %>% 
    tab_transpose() 
Gregory Demin
  • 4,596
  • 2
  • 20
  • 20