1

I have the following data frame:

df <- data.frame(stringsAsFactors=FALSE,
              ID = c(2016070707, 2016070707, 2016070707, 2017080808, 2017080808,
                     2017080808, 2017080808),
       team_name = c("Harlequins", "Harlequins", "Harlequins", "Bristol Rugby",
                     "Bristol Rugby", "Bristol Rugby", "Bristol Rugby"),
     player_name = c("Karl Dickson", "Tim Swiel", "Alofa Alofa",
                     "Jason Woodward", "Jack Wallace", "Will Hurrell",
                     "Tusi Pisi"),
             DGC = c(7, 5, 0, 3, 0, 8, 9),
              MR = c(3, 8, 31, 25, 0, 85, 0),
              CB = c(2, 9, 1, 13, 0, 0, 0)
  )

That I would like to transform to wide version:

new_df <- data.frame(stringsAsFactors=FALSE,
                ID = c(2016070707, 2017080808),
         team_name = c("Harlequins", "Bristol Rugby"),
          player_1 = c("Karl Dickson", "Jason Woodward"),
          player_2 = c("Tim Swiel", "Jack Wallace"),
          player_3 = c("Alofa Alofa", "Will Hurrell"),
          player_4 = c(NA, "Tusi Pisi"),
             DGC_1 = c(7, 3),
             DGC_2 = c(5, 0),
             DGC_3 = c(0, 8),
             DGC_4 = c(NA, 9),
              MR_1 = c(3, 25),
              MR_2 = c(8, 0),
              MR_3 = c(31, 85),
              MR_4 = c(NA, 0),
              CB_1 = c(2, 13),
              CB_2 = c(9, 0),
              CB_3 = c(1, 0),
              CB_4 = c(NA, 0)
      )

However, as you may see the first team has 3 players and second team has 4 players. How can I transpose df to new_df and also create these extra columns like player_4, DGC_4 in case there is different number of players in the team. And the difference will be compensated by "NAs" (just like in new_df).

Chase
  • 67,710
  • 18
  • 144
  • 161
Rama
  • 25
  • 4

2 Answers2

0

Here's one solution using package data.table

df <- data.frame(stringsAsFactors=FALSE,
                 ID = c(2016070707, 2016070707, 2016070707, 2017080808, 2017080808,
                        2017080808, 2017080808),
                 team_name = c("Harlequins", "Harlequins", "Harlequins", "Bristol Rugby",
                               "Bristol Rugby", "Bristol Rugby", "Bristol Rugby"),
                 player_name = c("Karl Dickson", "Tim Swiel", "Alofa Alofa",
                                 "Jason Woodward", "Jack Wallace", "Will Hurrell",
                                 "Tusi Pisi"),
                 DGC = c(7, 5, 0, 3, 0, 8, 9),
                 MR = c(3, 8, 31, 25, 0, 85, 0),
                 CB = c(2, 9, 1, 13, 0, 0, 0)
)

library(data.table)
#convert to data.table
setDT(df)
#create a sequence from 1 to N for each ID
df[, idx := 1:.N, by = ID]
#cast wide
dcast(ID + team_name ~ idx, data = df, value.var = c("player_name", "DGC", "MR", "CB"), )
#>            ID     team_name  player_name_1 player_name_2 player_name_3
#> 1: 2016070707    Harlequins   Karl Dickson     Tim Swiel   Alofa Alofa
#> 2: 2017080808 Bristol Rugby Jason Woodward  Jack Wallace  Will Hurrell
#>    player_name_4 DGC_1 DGC_2 DGC_3 DGC_4 MR_1 MR_2 MR_3 MR_4 CB_1 CB_2
#> 1:          <NA>     7     5     0    NA    3    8   31   NA    2    9
#> 2:     Tusi Pisi     3     0     8     9   25    0   85    0   13    0
#>    CB_3 CB_4
#> 1:    1   NA
#> 2:    0    0

Created on 2019-01-27 by the reprex package (v0.2.1)

Chase
  • 67,710
  • 18
  • 144
  • 161
0

Also one tidyverse possibility:

df %>%
 rowid_to_column() %>%
 gather(var, val, -c(ID, team_name, rowid)) %>%
 arrange(rowid, ID, team_name, var) %>%
 group_by(ID, team_name) %>%
 mutate(var = paste(var, gl(n_distinct(rowid), n_distinct(var)), sep = "_")) %>%
 select(-rowid) %>%
 spread(var, val, convert = TRUE) 

           ID team_name      CB_1  CB_2  CB_3  CB_4 DGC_1 DGC_2 DGC_3 DGC_4  MR_1  MR_2
        <dbl> <chr>         <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2016070707. Harlequins        2     9     1    NA     7     5     0    NA     3     8
2 2017080808. Bristol Rugby    13     0     0     0     3     0     8     9    25     0
   MR_3  MR_4 player_name_1  player_name_2 player_name_3 player_name_4
  <int> <int> <chr>          <chr>         <chr>         <chr>        
1    31    NA Karl Dickson   Tim Swiel     Alofa Alofa   <NA>         
2    85     0 Jason Woodward Jack Wallace  Will Hurrell  Tusi Pisi    

It does the job by, first, transforming the data from wide to long format. Then, arranging the data, and, finally, creating the new variable names and transforming it to the desired wide format.

tmfmnk
  • 38,881
  • 4
  • 47
  • 67