0

I am trying to create a function to get a output like below but getting different result.

I want table only for Col1,col2,col3 with regular expression as input parameter.

the output i am getting like below which is little strange and also showing data from text column which is not required.

enter image description here

Required output should look like below.

enter image description here

library(expss)

data<-data.frame(
  gender = c(1,2,1,2,1,2,1,2,2,2,2,1,1,2,2,2,2,1,1,1,1,1,2,1,2,1,2,2,2,1,2,1,2,1,2,1,2,2,2),
  sector = c(3,3,1,2,5,4,4,4,4,3,3,4,3,4,2,1,4,2,3,4,4,4,3,1,2,1,5,5,4,3,1,4,5,2,3,4,5,1,4),
  col_1=c(1,1,2,0,2,0,0,2,1,0,0,2,0,3,0,3,0,1,0,3,0,1,1,2,0,1,1,3,0,3,0,1,2,0,3,0,1,0,1),
  col_2=c(1,1,1,1,1,0,3,3,2,1,1,1,2,1,0,2,0,1,2,1,0,1,2,1,1,1,0,2,0,1,1,2,1,1,1,1,2,0,0),
  col_3=c(1,1,0,0,0,0,2,1,3,2,0,3,0,2,0,2,1,0,2,0,2,0,1,3,1,0,0,0,1,0,3,1,1,1,1,1,3,0,1),
  col_Text=c(NA,NA,NA,"we",NA,NA,NA,NA,"we",NA,NA,NA,NA,"we",NA,NA,NA,NA,"we",NA,NA,NA,NA,"we",NA,NA,NA,NA,"we",NA,"se",NA,NA,"we",NA,"te","we","te",NA),
  coll.4=c(1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
  coll.5=c(1,2,1,1,1,2,1,2,2,1,2,1,1,1,2,2,2,1,1,1,2,1,2,1,1,1,2,2,2,1,1,2,2,1,1,1,2,2,2)
)

data$col_1<-factor(data$col_1, levels=c(1,2,3,0), labels=c("sale","Ops","MGMT","Mark"))
data$col_2<-factor(data$col_2, levels=c(1,2,3,0), labels=c("sale","Ops","MGMT","Mark"))
data$col_3<-factor(data$col_3, levels=c(1,2,3,0), labels=c("sale","Ops","MGMT","Mark"))
data$coll.4<-factor(data$coll.4, levels=c(1,0), labels=c("USA","CA"))
data$coll.5<-factor(data$coll.5, levels=c(1,0), labels=c("Local","Regional"))

data$gender<-factor(data$gender, levels=c(1,2), labels=c("Male","female"))
data$sector<-factor(data$sector, levels=c(1,2,3,4,5), labels=c("TX","CA","NY","LA","WA"))


data$gender1 <- ifelse(data$gender == "Male",1, NA)
data$total <- ifelse(data$coll.5 == "Local",1, NA)

val_lab(data$gender1)<-c("GENDER"=1)
val_lab(data$total)<-c("All Market"=1)

lkl <- with(data,list(total,gender1))


fun1<- function(dataset,pattern,banner,label){
  print(pattern)
  npatt<-paste0(pattern, ".*(?<!_TEXT)$")
  T1 = dataset %>% 
    tab_rows(banner)
  lab<-paste0(pattern, "[fun1]:", label)
  for(each_var in npatt){
    T1 = T1 %>% 
      tab_cells("|" = mrset_p(each_var)) %>% 
      tab_stat_cpct(label = lab)
  }
  T1 %>% 
    tab_pivot(stat_position = "inside_columns") %>% 
    tab_transpose()}


t1 <- fun1(dataset=data, pattern="col_",banner=lkl,label="Table 1")

samrr_tr
  • 81
  • 8
  • You provide non-reproducible example. Your `fun1` has three arguments, but you call it with four arguments. And there are some other bugs. As for your main question - you don't need `get` with `mrset_p`. `mrset_p` already uses character argument to find variables by pattern. – Gregory Demin May 17 '22 at 22:53
  • @GregoryDemin I have updated the question , please check and help what i am doing wrong. – samrr_tr May 21 '22 at 18:15
  • In fun1 you take .... As a parameter. Three dots is the parameter. Four dots is nothing. It might sound picky but it shows you haven't even tried the code you're providing in a clean session. If you can't put in that effort why should we help? – Dason May 21 '22 at 18:31
  • @samrr_tr Now example works inspite of `....` argument. But I don't understand which statistic do you want to calculate? `*_cpct` calculates column percent, e. g. ratio of number of occurance of one value to the number of cases in the dataset. Your example doesn't look like this stat. – Gregory Demin May 21 '22 at 21:24
  • In required output table i just put random numbers you can ignore those numbers. i required tab_stat_cpct calculations only @GregoryDemin – samrr_tr May 22 '22 at 17:15
  • @samrr_tr You expect 'col_1', 'col_2', ... labels, but `mrset` will give you "sale","Ops","MGMT"... labels. If you really want 'col's then you need `mdset`. But `mdset` counts only 1's. `m*` is for multiple choice variables. They can be with binary ('md') or code encoding ('mr'). You try to use both and it is impossible. So it is unclear for me which result do you want. – Gregory Demin May 23 '22 at 21:23
  • what are the other options to display this kid of table in other ways...?? I am ok if there are any changes fits with my requirements. @GregoryDemin I mean what i need to change in my function but i need the input parameters as it is. – samrr_tr May 24 '22 at 15:34

1 Answers1

0
fun1<- function(dataset,pattern,banner,label){
    print(pattern)
    vars = grep(pattern, colnames(dataset), value = TRUE)
    T1 = dataset %>% 
        tab_rows(banner)
    lab<-paste0("[fun1]:", label)
    for(each_var in vars){
        T1 = T1 %>% 
            tab_cells("|" = ..p(each_var)) %>% 
            tab_cols(total(label = "|")) %>% 
            tab_stat_cpct(label = paste0(lab, "|",  each_var), total_row_position = "none")
    }
    T1 %>% 
        tab_pivot(stat_position = "inside_columns") %>% 
        tab_transpose()
    }


t1 <- fun1(dataset=data, pattern="^col_[0-9]",banner=lkl,label="Table 1")
t1

# |                |       | All Market |      |      |      | GENDER |      |      |      |
# |                |       |       sale |  Ops | MGMT | Mark |   sale |  Ops | MGMT | Mark |
# | -------------- | ----- | ---------- | ---- | ---- | ---- | ------ | ---- | ---- | ---- |
# | [fun1]:Table 1 | col_1 |       19.0 | 19.0 | 19.0 | 42.9 |   29.4 | 23.5 | 11.8 | 35.3 |
# |                | col_2 |       85.7 |  9.5 |  4.8 |      |   70.6 | 17.6 |  5.9 |  5.9 |
# |                | col_3 |       23.8 | 19.0 | 14.3 | 42.9 |   23.5 | 17.6 | 11.8 | 47.1 |

Gregory Demin
  • 4,596
  • 2
  • 20
  • 20
  • Just one more thing if the column name are like cc4.1_2,cc4.1_3,cc4.1_4,cc4.1_5,cc4.1_Text..... and i want to give them as a parameter like "cc4.1_" and it can select all columns matching this but except columns like cc4.1_Text. what should i change here ...?? now it taking column names like cc42.1_5,cc455.1_5,cc441.1_5 also – samrr_tr May 31 '22 at 13:21
  • @samrr_tr It will look like this: `"^cc4\\.1_[0-9]+"` – Gregory Demin May 31 '22 at 13:38
  • Thanks for the help but just one question i want to give input parameter *pattern* like "cc4.1_" and i want it can select all vars from it c(cc4.1_2,cc4.1_2,cc4.1_3,cc4.1_4,cc4.1_5). and i tried like below but its taking cc42.1_2,cc40.1_2,cc487.1_2 etc. – samrr_tr Jun 01 '22 at 16:03
  • Fun1<- function(dataset,pattern,banner,label){ print(pattern) npatt<-paste0(pattern, "\\d+") vars = grep(npatt, colnames(dataset), value = TRUE) T1 = dataset %>% tab_rows(banner) lab<-paste0("[Freq_sa_row]:", label) for(each_var in vars){ T1 = T1 %>% tab_cells("|" = ..p(each_var)) %>% tab_cols(total(label = "|")) %>% tab_stat_cpct(label = paste0(lab, "|", each_var), total_row_position = "none") } T1 %>% tab_pivot(stat_position = "inside_columns") %>% tab_transpose() } – samrr_tr Jun 01 '22 at 16:04