2

I have some input strings in R. Input strings are :

  • abcde, abcdabcd, apapap

Output strings should be :

  • abcde, abcdbcde, apbqcr

I need R code that does this thing. We should be able to iterate the string, count the number of occurrences of [a-z]characters in the string and cyclically increment the character by 1. That means a + 1 = b, b + 2 = d and so on. Numeric strings to be ignored.

dpel
  • 1,954
  • 1
  • 21
  • 31
Bullu
  • 23
  • 7
  • 4
    And what do with `z`? Please share your current code you have trouble with. – Wiktor Stribiżew Jul 12 '18 at 13:18
  • 2
    Substring-length seems to be variable in your example. So why should the second string not be read as `a bc da bc d` (in stead of `abcd abcd`), and since `bc` occurs twice, the result would become `a bc da cd d` in stead of `abcd bcde`. Can you provide a 100% waterproof logic to feed to an algoritm? – Wimpel Jul 12 '18 at 13:19
  • 1
    A very interesting problem, but the question is not perfectly clear yet. – Andre Elrico Jul 12 '18 at 13:52

3 Answers3

1

you can do:

library(purrr);library(magrittr)

vec <- c("abcde", "abcdabcd", "apapap","aaaa","zzzz")

letters2counts <- 1:26
names(letters2counts) <- letters[1:26]

num_vec <- sapply(vec,strsplit,split="") %>% map(~letters2counts[.] %>% unname)

counts  <- map(num_vec,~{u<-table(.);cbind(as.numeric(row.names(u)),as.numeric(u))})    

fun1 <- function(x,n) {
    x=x
    used_inds <- NULL
    for(i in unique(x)) {
        nn <- n[,2][n[,1] %in% i]
        if(nn == 0) next;
        ind<- which(x %in% i) %>% setdiff(.,used_inds)
        x[ind] <- x[ind]+0:(nn-1)
        used_inds <- c(ind,used_inds)
    }
    x[x > 26] <- x[x > 26] - 26
    return(x)
}

num_vec_calc <- Map(fun1,x=num_vec,n=counts)

map(num_vec_calc,~names(letters2counts)[.]%>% paste(.,collapse=""))

result:

$abcde
[1] "abcde"

$abcdabcd
[1] "abcdbcde"

$apapap
[1] "apbqcr"

$aaaa
[1] "abcd"

$zzzz
[1] "zabc"
Andre Elrico
  • 10,956
  • 6
  • 50
  • 69
1
fun <- function(x){
  a <- strsplit(x, "")[[1]]         # split the string to letters
  b <- strtoi(a, base = 36) - 9     # convert the letters to integers
  c <- numeric()
  c[1] <- b[1]

  if(length(b) > 1){
    for(i in 2:length(b)){
      c[i] <- b[i] + sum(b[1:(i-1)] == b[i])
    }
  }

  c <- c %% 26 ; c[c==0] <- 26
  result <- paste0(letters[c], collapse = "")
  return(result)
}

# fun("a")
# [1] "a"
# fun("zzz")
# [1] "zab"
# fun("abcde")
# [1] "abcde"
# fun("abcdabcd")
# [1] "abcdbcde"
# fun("apapap")
# [1] "apbqcr"
Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
1
increment <- function(x) {
  x <- strsplit(x, "")[[1L]]
  # now iterate through unique letters in string
  done <- vector(mode = "logical", length = length(x))
  for (l in unique(x)) {
    ln <- which(letters == l)
    where_m <- !done & l == x
    incrm_by <- 0:(sum(where_m) - 1)
    # Modulo operator tricky when indexing starts at 1
    x[where_m] <- letters[(ln + incrm_by - 1) %% 26 + 1]
    done[where_m] <- TRUE
  }
  paste(x, collapse = "")
}

sapply(x, increment)

     abcde   abcdabcd     apapap 
   "abcde" "abcdbcde"   "apbqcr"

Note a second z gets incremented to a

vec <- c("abcde", "abcdabcd", "apapap","aaaa","zzzzzz")
sapply(vec, increment)
     abcde   abcdabcd     apapap       aaaa     zzzzzz 
   "abcde" "abcdbcde"   "apbqcr"     "abcd"   "zabcde" 
s_baldur
  • 29,441
  • 4
  • 36
  • 69