base R
cols <- c("value1", "value2")
dat[,paste0(cols, "_q")] <- ave(dat[,cols], dat$lab, FUN=function(z) lapply(z, function(y) ecdf(y)(y)))
dat
# lab value1 value2 value1_q value2_q
# 1 wbc 7.0 6 0.7500000 1.0000000
# 2 wbc 6.5 3 0.5000000 0.7500000
# 3 rbc 3.5 2 0.3333333 1.0000000
# 4 rbc 4.0 2 0.6666667 1.0000000
# 5 plt 100.0 1 0.5000000 0.5000000
# 6 plt 120.0 2 1.0000000 1.0000000
# 7 wbc 5.0 2 0.2500000 0.5000000
# 8 wbc 7.5 1 1.0000000 0.2500000
# 9 rbc 4.1 0 1.0000000 0.3333333
dplyr
library(dplyr)
dat %>%
group_by(lab) %>%
mutate_at(vars(value1, value2), list(quant = ~ ecdf(.)(.))) %>%
ungroup()
# # A tibble: 9 x 5
# lab value1 value2 value1_quant value2_quant
# <chr> <dbl> <int> <dbl> <dbl>
# 1 wbc 7 6 0.75 1
# 2 wbc 6.5 3 0.5 0.75
# 3 rbc 3.5 2 0.333 1
# 4 rbc 4 2 0.667 1
# 5 plt 100 1 0.5 0.5
# 6 plt 120 2 1 1
# 7 wbc 5 2 0.25 0.5
# 8 wbc 7.5 1 1 0.25
# 9 rbc 4.1 0 1 0.333
data.table
library(data.table)
cols <- c("value1", "value2")
datDT <- as.data.table(dat)
datDT[, (paste0(cols,"_q")) := lapply(.SD, function(z) ecdf(z)(z)), .SDcols = cols, by = .(lab) ]
datDT
# lab value1 value2 value1_q value2_q
# 1: wbc 7.0 6 0.7500000 1.0000000
# 2: wbc 6.5 3 0.5000000 0.7500000
# 3: rbc 3.5 2 0.3333333 1.0000000
# 4: rbc 4.0 2 0.6666667 1.0000000
# 5: plt 100.0 1 0.5000000 0.5000000
# 6: plt 120.0 2 1.0000000 1.0000000
# 7: wbc 5.0 2 0.2500000 0.5000000
# 8: wbc 7.5 1 1.0000000 0.2500000
# 9: rbc 4.1 0 1.0000000 0.3333333