10

I've this situation:

foo <- data.frame("vars" = c("animal: mouse | wks: 12 | site: cage | PI: 78",
                            "animal: dog | wks: 32 | GI: 0.2",
                            "animal: cat | wks: 8 | site: wild | PI: 13"))

where variable names and relative data were stored in character strings like the above example. In particular, each variable_name/its_data unit were delimited by a |. After the : there is the relative data.

I would like to have a final dataframe like this:

  animal  wks  site  PI   GI
  mouse   12   cage  78   NA
    dog   32   <NA>  NA  0.2
    cat    8   wild  13   NA
Borexino
  • 802
  • 8
  • 26

4 Answers4

8

We may use read.dcf from base R

out <- type.convert(as.data.frame(read.dcf(
    textConnection(paste(gsub("\\s+\\|\\s+", "\n", foo$vars), 
    collapse="\n\n")))), as.is = TRUE)

-output

> out
  animal wks site PI  GI
1  mouse  12 cage 78  NA
2    dog  32 <NA> NA 0.2
3    cat   8 wild 13  NA
> str(out)
'data.frame':   3 obs. of  5 variables:
 $ animal: chr  "mouse" "dog" "cat"
 $ wks   : int  12 32 8
 $ site  : chr  "cage" NA "wild"
 $ PI    : int  78 NA 13
 $ GI    : num  NA 0.2 NA
akrun
  • 874,273
  • 37
  • 540
  • 662
5

Here is a dplyr solution:

library(dplyr)
library(tidyr)

tibble(foo) %>%
  mutate(row = row_number()) %>% 
  separate_rows(vars, sep = '\\|') %>% 
  separate(vars, c("a", "b"), sep = '\\:') %>% 
  mutate(across(everything(), str_trim)) %>% 
  group_by(a) %>% 
  pivot_wider(names_from = a, values_from = b) %>% 
  type.convert(as.is = TRUE) %>% 
  select(-row)
  animal   wks site     PI    GI
  <chr>  <int> <chr> <int> <dbl>
1 mouse     12 cage     78  NA  
2 dog       32 NA       NA   0.2
3 cat        8 wild     13  NA 
TarJae
  • 72,363
  • 6
  • 19
  • 66
5

Another base R option using Reduce + merge

type.convert(
  Reduce(
    function(x, y) merge(x, y, all = TRUE),
    lapply(
      strsplit(foo$vars, ":|\\|"),
      function(x) {
        m <- matrix(trimws(x), 2)
        setNames(data.frame(m[2, , drop = FALSE]), m[1, ])
      }
    )
  ),
  as.is = TRUE
)

gives

  animal wks site PI  GI
1    cat   8 wild 13  NA
2    dog  32 <NA> NA 0.2
3  mouse  12 cage 78  NA
ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81
3

Here is a way using scan() row-wise then merge.

lapply(1:nrow(foo), \(x) 
       scan(text=foo[x, ], what=character(), sep='|', strip.white=T, qui=T) |>
  (\(.) do.call(rbind, strsplit(., ': ')))() |>
  (\(.) setNames(data.frame(t(.[, 2])), .[, 1]))()) |>
  (\(.) Reduce(\(...) merge(..., all=TRUE), .))()
#   animal wks site   PI   GI
# 1    cat   8 wild   13 <NA>
# 2    dog  32 <NA> <NA>  0.2
# 3  mouse  12 cage   78 <NA>

Note: R >= 4.1 used

jay.sf
  • 60,139
  • 8
  • 53
  • 110