0

I want to perform student's t-test on the res.gs dataframe, where the samples are either is.mut or is.wt.

# Students' T-test without KMT2C gene
is.mut <- grepl('^[^0]', nokmt2c.res.gs$n_mutated_group1)
is.wt <- !grepl('^[^0]', nokmt2c.res.gs$n_mutated_group1)

t.test.all.genes <- function(x,s1,s2) {
  x <- as.numeric(x)
  x1 <- x[s1]
  x2 <- x[s2]
  x1 <- as.numeric(x1)
  x2 <- as.numeric(x2)
  t.out <- t.test(x1,x2, alternative="two.sided",var.equal=T)
  out <- as.numeric(t.out$p.value)
  return(out)
}
rawp <- apply(nokmt2c.res.gs, 1, t.test.all.genes, s1=is.mut, s2=is.wt)

Traceback:

Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : 
  missing value where TRUE/FALSE needed
In addition: Warning messages:
1: In FUN(newX[, i], ...) : NAs introduced by coercion
2: In FUN(newX[, i], ...) : NAs introduced by coercion
Called from: t.test.default(x1, x2, alternative = "two.sided", var.equal = T)

Data

    > dput(res.gs)
    structure(list(Hugo_Symbol = c("AKAP9", "AKAP9", "ERCC2", "ERCC2", 
    "HECTD1", "HECTD1", "HERC1", "HERC1", "KMT2C", "KMT2C", "MACF1", 
    "MACF1", "MROH2B", "MROH2B"), Missense_Mutation = c(9L, 9L, 9L, 
    9L, 6L, 6L, 8L, 8L, 19L, 19L, 5L, 5L, 5L, 5L), Nonsense_Mutation = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), Splice_Site = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), total = c(9L, 
    9L, 9L, 9L, 6L, 6L, 8L, 8L, 20L, 20L, 6L, 6L, 6L, 6L), MutatedSamples = c(6L, 
    6L, 9L, 9L, 6L, 6L, 6L, 6L, 8L, 8L, 6L, 6L, 6L, 6L), AlteredSamples = c(6L, 
    6L, 9L, 9L, 6L, 6L, 6L, 6L, 8L, 8L, 6L, 6L, 6L, 6L), Group1 = c("Non-Responder", 
    "Responder", "Non-Responder", "Responder", "Non-Responder", "Responder", 
    "Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder", 
    "Responder", "Non-Responder", "Responder"), Group2 = c("Rest", 
    "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", 
    "Rest", "Rest", "Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", 
    "6 of 25", "0 of 25", "9 of 25", "0 of 25", "6 of 25", "0 of 25", 
    "6 of 25", "1 of 25", "7 of 25", "0 of 25", "6 of 25", "0 of 25", 
    "6 of 25"), n_mutated_group2 = c("6 of 25", "0 of 25", "9 of 25", 
    "0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "7 of 25", 
    "1 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25"), p_value = c(0.022289766970618, 
    0.022289766970618, 0.00163083541184905, 0.00163083541184905, 
    0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618, 
    0.0487971536957187, 0.0487971536957187, 0.022289766970618, 0.022289766970618, 
    0.022289766970618, 0.022289766970618), OR = c(0, Inf, 0, Inf, 
    0, Inf, 0, Inf, 0.111488645279478, 8.96952328636894, 0, Inf, 
    0, Inf), OR_low = c(0, 1.33358819424024, 0, 2.56647319276964, 
    0, 1.33358819424024, 0, 1.33358819424024, 0.00228988507629356, 
    1.0079479819766, 0, 1.33358819424024, 0, 1.33358819424024), OR_high = c(0.749856668137133, 
    Inf, 0.38963976043749, Inf, 0.749856668137133, Inf, 0.749856668137133, 
    Inf, 0.992114690322592, 436.703138665198, 0.749856668137133, 
    Inf, 0.749856668137133, Inf), fdr = c(0.248902397838568, 0.248902397838568, 
    0.109265972593886, 0.109265972593886, 0.248902397838568, 0.248902397838568, 
    0.248902397838568, 0.248902397838568, 0.467058471087594, 0.467058471087594, 
    0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568
    )), row.names = c(NA, -14L), class = "data.frame")
melolili
  • 1,237
  • 6
  • 16
  • Why are applying by rows, you will get 1 value per iteration. – user2974951 Mar 18 '22 at 11:36
  • @user2974951 what do you mean? And what is the solution? – melolili Mar 18 '22 at 11:45
  • This line `rawp <- apply(nokmt2c.res.gs, 1, t.test.all.genes, s1=is.mut, s2=is.wt)`, you are iterating over the rows of nokmt2c.res.gs, and in the function t.test.all.genes you are performing something on the columns, but the columns now will have only 1 row, which is probably not what you want. – user2974951 Mar 18 '22 at 12:52
  • How can I change the function to iterate over the columns and not the rows? – melolili Mar 18 '22 at 13:35
  • Well... all you have to do is change the 1 to 2, but I'm not sure this will work... I don't really know what you are doing. – user2974951 Mar 18 '22 at 13:39
  • Error in t.test.default(x1, x2, alternative = "two.sided", var.equal = T) : not enough 'x' observations In addition: Warning message: In FUN(newX[, i], ...) : NAs introduced by coercion – melolili Mar 18 '22 at 13:42
  • Help me understand, the vectors is.mut and is.wt contains names of columns which you want to t-test? Do these two vectors have the same length, so that you compare the first column in is.mut with the first column in is.wt, second with second and so on? Or do you want to do all pairwise comparisons? – user2974951 Mar 18 '22 at 13:44
  • Before getting into the nitty gritty of performing tests explain what data is and what hypothesis you want to test. *Perform t-test on a data frame* provides no information. – dipetkov Mar 18 '22 at 19:46

0 Answers0