As an amendment/side note to LyzandeRs answer here a version that does not use dplyr
vocabulary, only the magrittr
pipe. Hence, writing wrapper functions and specifying arguments, etc. may be skipped.
This is a bit more verbose than dplyr
. But it is less verbose than base
and allows to use the full flexibility of any function such as grep
or stringi::stri_detect
, etc.
And it is significantly faster. Check below benchmarks. It should be noted, of course, that speed would have to be checked for larger examples, the overhead of dplyr is quite large for this small example, hence, a fair speed comparison depends on the use case.
df <- data.frame(baa=0,boo=0,boa=0,lol=0,bAa=0)
library(magrittr)
df %>%
.[,grep("(?i)b(?!a)", names(.), perl = T)]
# boo boa
# 1 0 0
#in the following a copy of LyzanderRs approaches
library(dplyr)
matches2 <- function (match, ignore.case = TRUE, vars = current_vars()) {
dplyr:::grep_vars(match, vars, ignore.case = ignore.case, perl = TRUE)
}
grep_vars2 <- function (needle, haystack, ...) {
grep(needle, haystack, perl = TRUE, ...)
}
matches3 <- function (match, ignore.case = TRUE, vars = current_vars()) {
grep_vars2(match, vars, ignore.case = ignore.case)
}
library(microbenchmark)
microbenchmark(
df %>% select(matches2("(?i)b(?!a)")),
df %>% select(matches3("(?i)b(?!a)")),
df %>% .[,grep("(?i)b(?!a)", names(.), perl = T)]
)
# Unit: microseconds
# expr min lq mean median uq max neval
# df %>% select(matches2("(?i)b(?!a)")) 3994.867 4309.877 4570.6414 4555.8065 4726.9310 6618.769 100
# df %>% select(matches3("(?i)b(?!a)")) 3981.841 4177.834 4792.2025 4396.3275 4655.6780 31812.876 100
# df %>% .[, grep("(?i)b(?!a)", names(.), perl = T)] 183.164 210.797 242.1678 237.2455 263.6935 554.624 100