0

**Given is a df **

df = data.frame(c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26","23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20"))

**I would like to reorder the strings and create 4 new columns*

Example 1 with 1 Code and 1 Time

"93/00:20"

Result

Col_Code1 = 93

Col_Time1 = 00:20

Example 2 with 4 Codes and 4 Times

"1A/38A/28C/28/00:05/00:10/01:05/00:20"

Result

Col_Code1 = 1A

Col_Time1 = 00:05

Col_Code2 = 38A

Col_Time2 = 00:10

Col_Code3 = 28C

Col_Time3 = 01:05

Col_Code4 = 28

Col_Time4 = 00:20

Any idea how to split the strings and create new columns based on the pattern above?

This is what i have so far, unfortunately it does not differentiate between the length of the string, hence i have time values and code values in one column.

Unsorted Result

df = c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26","23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20")

current_df <- df %>% 
    str_split(pattern = "/",simplify = TRUE) %>%
    as_tibble() %>% 
    bind_cols()

Desired Result

df = c("28A/00:05/38A/00:05/28C/00:05","93/00:20","93B/06:26","23A/00:04/87E/00:05","1A/00:05/38A/00:10/28C/01:05/28/00:20")

desired_df <- df %>% 
  str_split(pattern = "/",simplify = TRUE) %>%
  as_tibble() %>% 
  bind_cols()

Thanks in advance

LePopp
  • 61
  • 6

3 Answers3

0

First we split and turn the results into a long format, then combine them.

ss = strsplit(as.character(df[[1]]), split = "/")
ss = lapply(ss, function(x) {
  d = data.frame(matrix(x, ncol = 2), stringsAsFactors = F)
  d = setNames(d, c("Col_Code", "Col_Time"))
  d$index = 1:nrow(d)
  d
})

dlong = dplyr::bind_rows(ss, .id = "id")
head(dlong)
#   id Col_Code Col_Time index
# 1  1      28A    00:05     1
# 2  1      38A    00:05     2
# 3  1      28C    00:05     3
# 4  2       93    00:20     1
# 5  3      93B    06:26     1
# 6  4      23A    00:04     1

I think that's a nice tidy format, and for a lot of uses you'd be better stopping there. However, here's a general way to continue if needed:

library(reshape2)
ww = list()
for (vv in c("Col_Code", "Col_Time")) {
  d1 = dcast(dlong, id ~ index, value.var = vv)
  names(d1)[-1] = paste0(vv, names(d1)[-1])
  ww[[vv]] = d1
}

result = Reduce(f = merge, ww)
head(result)
#   id Col_Code1 Col_Code2 Col_Code3 Col_Code4 Col_Time1 Col_Time2 Col_Time3 Col_Time4
# 1  1       28A       38A       28C      <NA>     00:05     00:05     00:05      <NA>
# 2  2        93      <NA>      <NA>      <NA>     00:20      <NA>      <NA>      <NA>
# 3  3       93B      <NA>      <NA>      <NA>     06:26      <NA>      <NA>      <NA>
# 4  4       23A       87E      <NA>      <NA>     00:04     00:05      <NA>      <NA>
# 5  5        1A       38A       28C        28     00:05     00:10     01:05     00:20
Gregor Thomas
  • 136,190
  • 20
  • 167
  • 294
0

Tested with your data and the friendly help of packages stringr, magrittr

lapply(df,str_split,pattern="/") %>% unlist(.,recursive=F) %>% lapply(.,function(x){split(x,rep(0:1,each=length(x)/2))}) %>% 
        lapply(.,function(x)do.call(cbind,x)) %>% Reduce(rbind,.)

Output

      0     1      
 [1,] "28A" "00:05"
 [2,] "38A" "00:05"
 [3,] "28C" "00:05"
 [4,] "93"  "00:20"
 [5,] "93B" "06:26"
 [6,] "23A" "00:04"
 [7,] "87E" "00:05"
 [8,] "1A"  "00:05"
 [9,] "38A" "00:10"
[10,] "28C" "01:05"
[11,] "28"  "00:20"
Andre Elrico
  • 10,956
  • 6
  • 50
  • 69
0

Here's using tidyverse to convert your data to long format:

library(tidyverse)

df %>%
  extract(col1, c("col1", "col2"), regex = "((?:\\w+/?)+)/((?:\\d{2}:\\d{2}/?)+)") %>%
  map_dfc(~ as.tibble(.) %>%  mutate(ID = row_number()) %>% separate_rows(1, sep = "/")) %>%
  select(ID, starts_with("value"))

Result:

# A tibble: 11 x 3
      ID value value1
   <int> <chr>  <chr>
 1     1   28A  00:05
 2     1   38A  00:05
 3     1   28C  00:05
 4     2    93  00:20
 5     3   93B  06:26
 6     4   23A  00:04
 7     4   87E  00:05
 8     5    1A  00:05
 9     5   38A  00:10
10     5   28C  01:05
11     5    28  00:20

Data:

df = data.frame(col1 = c("28A/38A/28C/00:05/00:05/00:05","93/00:20","93B/06:26",
                         "23A/87E/00:04/00:05","1A/38A/28C/28/00:05/00:10/01:05/00:20"),
                stringsAsFactors = FALSE)
acylam
  • 18,231
  • 5
  • 36
  • 45