With dplyr 1.1.0 and up you can use a non-equi left_join()
if you first turn genes_of_interest
into a tidy format. This will be very fast and should be very flexible if you have other columns to also join by.
library(dplyr, warn.conflicts = FALSE)
library(tidyr)
jap <- tibble(
POS = c(836924, 922009, 1036959, 141607615, 164000000, 118528028)
)
genes_of_interest <- tribble(
~MGAM, ~SI, ~TREH, ~SLC2A2, ~SLC2A5, ~SLC5A1, ~TAS1R3, ~LCT,
141607613, 164696686, 118528026, 170714137, 9095166, 32439248, 1266660, 136545420,
141806547, 164796284, 118550359, 170744539, 9148537, 32509016, 1270694, 136594754
)
# Manipulate `genes_of_interest` into a tidy data format
genes_of_interest <- genes_of_interest %>%
mutate(bound = c("start", "end")) %>%
pivot_longer(-bound) %>%
pivot_wider(names_from = bound, values_from = value) %>%
mutate(match = TRUE)
genes_of_interest
#> # A tibble: 8 × 4
#> name start end match
#> <chr> <dbl> <dbl> <lgl>
#> 1 MGAM 141607613 141806547 TRUE
#> 2 SI 164696686 164796284 TRUE
#> 3 TREH 118528026 118550359 TRUE
#> 4 SLC2A2 170714137 170744539 TRUE
#> 5 SLC2A5 9095166 9148537 TRUE
#> 6 SLC5A1 32439248 32509016 TRUE
#> 7 TAS1R3 1266660 1270694 TRUE
#> 8 LCT 136545420 136594754 TRUE
jap %>%
left_join(
genes_of_interest,
by = join_by(between(POS, start, end)),
multiple = "any"
) %>%
mutate(match = !is.na(match))
#> # A tibble: 6 × 5
#> POS name start end match
#> <dbl> <chr> <dbl> <dbl> <lgl>
#> 1 836924 <NA> NA NA FALSE
#> 2 922009 <NA> NA NA FALSE
#> 3 1036959 <NA> NA NA FALSE
#> 4 141607615 MGAM 141607613 141806547 TRUE
#> 5 164000000 <NA> NA NA FALSE
#> 6 118528028 TREH 118528026 118550359 TRUE