0

I obtained rawp by performing Student's t-test function on dat.filtered and then I performed correction using Holm and Holm-Bonferroni methods. I have a huge increase in log2 adjusted p-value of 1.0. Is that normal?

# Eliminate probes with rowMeans less than 0 on a log2 scale
dat.fil <- subset(samp.matrix, log2(rowMeans(samp.matrix)) > 0)
removed <- nrow(samp.matrix) - nrow(dat.fil)
# Eliminate probes with rowMeans less than 3 on a log2 scale
dat.filtered <- log2(subset(dat.fil, rowMeans(dat.fil) > 3))
dat.filtered <- as.data.frame(dat.filtered)

# Student's t-test function 
rawp <- c()
ptm <- proc.time()
t.test.all.genes <- function(x,s1,s2) {
  x <- as.numeric(x)
  x1 <- x[s1]
  x2 <- x[s2]
  x1 <- as.numeric(x1)
  x2 <- as.numeric(x2)
  t.out <- t.test(x1,x2, alternative="two.sided",var.equal=T)
  out <- as.numeric(t.out$p.value)
  return(out)
}
rawp <- apply(dat.filtered, 1, t.test.all.genes, s1=is.ctl, s2=is.glioma)

# Bonferroni correction

Bonferroni <- 0.05/length(rawp) 

# Holm-Bonferroni correction
p.holm <- p.adjust(rawp, method="holm")
p.raw  <- sort(rawp)
p.holm <- sort(p.holm)
p1     <- as.matrix(p.raw)
p2     <- as.matrix(p.holm)
all.p  <- cbind(p1, p2)
colnames(all.p) <- c("Raw P-value", "Adjusted P-Value")

# Histogram illustrating the distribution of P-values 
hist(rawp, col="#1f456e", xlab="P-Value", ylab="Frequency", main="Histogram of T-Test P-Values values for GDS1962 profiles")
hist(p.holm, col="#7ef9ff", add=T)
legend("top", legend = colnames(all.p), pch = 16, col = c("#1f456e", "#7ef9ff"))

enter image description here

> dput(dat.filtered[1:5,1:5])
structure(list(NB_GSM97804 = c(11.4837654029546, 8.47167521439204, 
7.63662462054365, 10.8088033303859, 6.89845023280787), NB_GSM97805 = c(12.3853771076927, 
8.32147718222831, 7.51412226017071, 10.876516946565, 7.49345520092618
), NB_GSM97807 = c(12.2256292159381, 8.20065343636747, 7.67454539384587, 
9.98299357469431, 7.29186136958522), NB_GSM97809 = c(12.5112090703651, 
8.00842862207058, 7.53060141451683, 10.0005634427101, 7.27705487614873
), NB_GSM97811 = c(12.1185193601157, 8.3264294871223, 6.74819284958946, 
10.1469504589321, 7.88386515450979)), row.names = c("1007_s_at", 
"1053_at", "117_at", "121_at", "1255_g_at"), class = "data.frame")

> dput(samp.matrix[1:5,1:5])
structure(c(2863.9, 355, 199, 1793.8, 119.3, 5350.2, 319.9, 182.8, 
1880, 180.2, 4789.4, 294.2, 204.3, 1012, 156.7, 5837.8, 257.5, 
184.9, 1024.4, 155.1, 4446.7, 321, 107.5, 1133.8, 236.2), .Dim = c(5L, 
5L), .Dimnames = list(c("1007_s_at", "1053_at", "117_at", "121_at", 
"1255_g_at"), c("NB_GSM97804", "NB_GSM97805", "NB_GSM97807", 
"NB_GSM97809", "NB_GSM97811")))
melil
  • 81
  • 8
  • Interesting question, but it's off topic here; try stats.stackexchange.com instead. – Robert Dodier Aug 15 '21 at 18:52
  • Yes, that's normal. What `p.adjust` does with method = `"holm"` and `m` p-values is to multiply the smallest one by `m`, the next smallest by `m-1`, etc. The result is truncated to the range 0 to 1. Any p-value bigger than 0.5 (except the biggest one) will end up at 1; any bigger than 1/3 except the two biggest will end up at 1, etc. – user2554330 Aug 15 '21 at 19:40

0 Answers0