0

I have a list of columns whose number belongs to each category

  list.group <-list(list(id = 2, type = "num"), list(id = 3, type = "num"), 
        list(id = 4, type = "cat"), list(id = 5, type = "cat"))

I have a function in which there are 2 tests(wilcox test and fisher test) for columns with the type "num" I want the wilcox test to be performed, and for the type "cat" the fisher test is performed.

First I take a list and divide it into 2 lists( a list of columns and a list of categories):

w = unlist(list.group, recursive = TRUE)
w.length = length(w)
col.id   = w[seq(1,w.length,2)]
col.type = w[seq(2,w.length,2)]
col.id    = as.integer(col.id)
col.type = as.character(col.type)

Function:

combination <- list(c(3,24),c(3,14))
wilcox.fun <- function(df, id_group){
  df = df[df$GROUP%in%id_group,]
 x <- function(dat) { 
  do.call(rbind, lapply(combination, function(x) {
    if(col.type=="num"){
       test <- wilcox.test(dat[[x[1]]], dat[[x[2]]])}
    if(col.type=="cat"){
       test1 <- fisher.test(dat[[x[1]]], dat[[x[2]]])}
    data.frame(Test = sprintf('Group %s by Group %s Group',x[1],x[2]), 
               #W = round(test$statistic,4), 
               p = test$p.value,
               p1 = test1$p.value,
               #median=paste(x[1],median.group.1,x[2],median.group.2),
               nmat = table(dat[[x[1]]]),
               nmat1 = round((prop.table(table(dat[[x[1]]]), 1) * 100), 1),
               nmat2=table(dat[[x[2]]]) 
               )
  }))
 }
 return (purrr::map_df(split(df, df$GROUP),x,.id="GROUP" ))
}

data frame:

data <- structure(list(GROUP = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), 
    col1 = c(23L, 432L, 234L, 234L, 3123L, 657L, 8768L, 123L, 
    42323L), col2 = c(567L, 765L, 8678L, 46L, 35L, 24L, 76L, 
    789L, 45L), col3 = c(1L, 3L, 5L, 7L, 8L, 0L, 8L, 7L, 3L), 
    col4 = c("S", "S", "S", "S", "F", "F", "F", "F", "F")), class = "data.frame", row.names = c(NA, 
-9L))
GOGA GOGA
  • 407
  • 2
  • 7

1 Answers1

1

Instead of having a separate vector (col.id and col.type) you can check the class of the columns that are passed in the function. If both the columns in lapply are numeric we do a wilcox test or else a fisher test.

Also I don't think having table(...) and prop.table(table(...)) in the dataframe will work because the dataframe created is a 1-row dataframe whereas table would have output of length > 1. Other than that table(dat[[x[1]]]) and table(dat[[x[2]]]) can also have output of different lengths which will create issues while constructing the dataframe.

wilcox.fun <- function(df, id_group){
  df = df[df$GROUP%in%id_group,]
  x <- function(dat) { 
    do.call(rbind, lapply(combination, function(x) {
      col1 <- dat[[x[1]]]
      col2 <- dat[[x[2]]]
      if(is.numeric(col1) && is.numeric(col2)) test <- wilcox.test(col1, col2)
      else  test <- fisher.test(col1, col2)
      data.frame(Test = sprintf('Group %s by Group %s Group',x[1],x[2]), 
                 #W = round(test$statistic,4), 
                 p = test$p.value
      )
    }))
  }
  return (purrr::map_df(split(df, df$GROUP),x,.id="GROUP" ))
}

wilcox.fun(data, c(1, 2))

If you want to explicitly pass col.type, it should have the same structure as combination and then you can use Map -

col_type_list <- list(col.type[1:2], col.type[3:4])

wilcox.fun <- function(df, id_group){
  df = df[df$GROUP%in%id_group,]
  x <- function(dat) { 
    do.call(rbind, Map(function(x, y) {
      col1 <- dat[[x[1]]]
      col2 <- dat[[x[2]]]
      if(all(y == 'num')) test <- wilcox.test(col1, col2)
      else  test <- fisher.test(col1, col2)
      data.frame(Test = sprintf('Group %s by Group %s Group',x[1],x[2]), 
                 #W = round(test$statistic,4), 
                 p = test$p.value
      )
    }, combination, col_type_list))
  }
  return (purrr::map_df(split(df, df$GROUP),x,.id="GROUP" ))
}
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • This is suitable, but there is a small problem, the fact is that columns with the type "cat" can also be numeric, but since they are cat the fisher test is applied to them – GOGA GOGA Aug 12 '21 at 03:20
  • I added an update to use `col.type` specifically for the test. – Ronak Shah Aug 12 '21 at 03:44