Tokenizing is probably the best approach. You could make a lookup table manually like this:
a <- gsub("]", "", lookup$pattern, fixed = TRUE)
a <- gsub("[", "", a, fixed = TRUE)
a <- gsub(" ", "", a, fixed = TRUE)
b <- strsplit(a, ",")
c <- Map(cbind, b, lookup$group)
d <- data.frame(do.call(rbind, c))
colnames(d) <- c("value", "group")
d
#> value group
#> 1 aa11 group A
#> 2 aa21 group A
#> 3 aa31 group A
#> 4 aa34 group A
#> 5 x23z group B
#> 6 x22z group B
#> 7 x32z group B
#> 8 x35z group B
#> 9 x34z group B
#> 10 lg32z group C
#> 11 lg22z group C
#> 12 lg84x group C
#> 13 lg94y group C
Or you could skip tokenizing and "fuzzy join":
a <- gsub(", ", "|", lookup$pattern, fixed = TRUE)
a <- gsub("[", "", a, fixed = TRUE)
a <- gsub("]", "", a, fixed = TRUE)
lookup2 <- cbind(lookup,a)
lookup2
#> pattern group a
#> 1 [aa11, aa21, aa31, aa34] group A aa11|aa21|aa31|aa34
#> 2 [x23z, x22z, x32z, x35z, x34z] group B x23z|x22z|x32z|x35z|x34z
#> 3 [lg32z, lg22z, lg84x, lg94y] group C lg32z|lg22z|lg84x|lg94y
for(i in 1:nrow(lookup2)){
df[grepl(pattern = lookup2$a[i], x = df$V1),"V3"] <- lookup2$group[i]
}
df
#> V1 V2 V3
#> 1 [aa31, aa34] group A group A
#> 2 [lg94z] group C <NA>
#> 3 [lg84x] group C group C
#> 4 [x22z, x23z] group B group B
Created on 2021-09-22 by the reprex package (v2.0.1)
Data:
lookup <- data.frame(
pattern = c("[aa11, aa21, aa31, aa34]",
"[x23z, x22z, x32z, x35z, x34z]",
"[lg32z, lg22z, lg84x, lg94y]"),
group = c("group A", "group B", "group C"))
df <- data.frame(
V1 = c("[aa31, aa34]", "[lg94z]", "[lg84x]", "[x22z, x23z]"),
V2 = c("group A", "group C", "group C", "group B"))
Note that there is a typo in your input data, so it returns NA
at df[2,"V3"]
. I am pretty sure the loop can be vectorized, I just can't think of how at the moment. I'll update if it comes to me.