0

I have two rather large data files which I need to merge into one, in the following way:

A  <- tibble(
    id=1:2,
    firstName=c("Alice", "Bob")
)

B  <- tibble(
    id=c(1,1,2),
    email=c("alice@wonder.land.com", "alice2@wonderland.com", "bob@builder.com")
)

desiredResult  <- tibble(
    id=1:2,
    firstName=c("Alice", "Bob"),
    email1=c("alice@wonderland.com", "bob@builder.com"),
    email2=c("alice2@wonderland.com", NA)
) 

How can this be done efficiently? I tried using spread() but did not succeed and could only hack together a bad solution:

notGood  <-
    inner_join(A, B, by = "id") %>%
    split(., .$id) %>%
    map_dfr(function(x) as.tibble(t(unlist(x)))) %>%
    replace(is.na(.), "") %>%
    unite(id, id1, id, sep = "") %>%
    unite(firstName, firstName1, firstName, sep = "") %>%
    unite(email, email1, email, sep = "") %>%
    select(id, firstName, matches("email"))

EDIT:

The suggested solutions work great, but how could I apply them to more than one column? Like in this example:

A  <- tibble(
    id=1:2,
    firstName=c("Alice", "Bob")
)

B  <- tibble(
    id=c(1,1,2),
    email=c("alice@wonder.land.com", "alice2@wonderland.com", "bob@builder.com"),
    phone=c("123", "456", "789")
)

desiredResult  <- tibble(
    id=1:2,
    firstName=c("Alice", "Bob"),
    email1=c("alice@wonderland.com", "bob@builder.com"),
    email2=c("alice2@wonderland.com", NA),
    phone1=c("123", "789"),
    phone2=c("456", NA)
)

Simply adding more column names to the suggested answers doesn't quite work:

A %>%
    left_join(B, by='id') %>%
    group_by(id)%>%
    mutate(rn=paste0('email',row_number())) %>%
    mutate(rn2=paste0('phone',row_number())) %>%
    spread(rn, email) %>%
    spread(rn2, phone)
Florian
  • 147
  • 3
  • 7

2 Answers2

2

Check this solution:

B %>%
  group_by(id) %>%
  mutate(rn = paste0('email', row_number())) %>%
  spread(rn, email) %>%
  right_join(A) %>%
  select(id, firstName, everything())

Answer to added question:

A %>%
  left_join(
    B %>%
      gather(key, val, -id) %>%
      group_by(id, key) %>%
      mutate(key2 = paste0(key, row_number())) %>%
      ungroup() %>%
      select(-key) %>%
      spread(key2, val)
  )
Paweł Chabros
  • 2,349
  • 1
  • 9
  • 12
  • 2
    good idea. If you do it like this: `A %>% left_join(B, by='id') %>% group_by(id) %>% mutate(rn=paste0('email',row_number())) %>% spread(rn, email)` you can skip `select` to get them in right order. Cheers. – M-- Feb 13 '19 at 15:29
  • Thanks, this works great! Do you also have an idea how to apply this to more than one column? Like in the edit to the question. – Florian Feb 15 '19 at 16:18
  • 1
    @Florian I've eddited ma answer. Check if this is what you are looking for. – Paweł Chabros Feb 15 '19 at 17:23
1
desiredResult <-
  A %>% 
  inner_join(B %>% 
               group_by(id) %>% 
               mutate(ColName = paste0("email",row_number())) %>% 
               ungroup() %>% 
               spread(ColName, email), by = "id")
Kerry Jackson
  • 1,821
  • 12
  • 20