1

I have the following example dataframe that I need to reshape but am not able to use the reshape or cast functions to do what I need to

df
   Testee Gender UniqueIdentifier BirthYear Graph V1 V2 V3 V4
1 7685906      1   33448683-29373      1996     1  4  6  6  5
2      NA     NA                         NA     2  7  2  9  6
3      NA     NA                         NA     3 -3  4 -3 -1
4 7685910      2   33444446-47897      1997     1  8  0  3  4
5      NA     NA                         NA     2  7  9  3  2
6      NA     NA                         NA     3  1 -9  0  2

I want to transpose the row values for V1, V2, V3, and V4 for each Graph (1, 2 and 3). So I need 12 columns: Graph1.V1 Graph1.V2 Graph1.V3 Graph1.V4 Graph2.V1 Graph2.V2 etc but I'm struggling with the cast function

This is the desired output

df2
   Testee Gender UniqueIdentifier BirthYear X1.V1 X1.V2 X1.V3 X1.V4 X2.V1 X2.V2 X2.V3 X2.V4 X3.V1 X3.V2 X3.V3 X3.V4
1 7685906      1   33448683-29373      1996     4     6     6     5     7     2     9     6    -3     4    -3    -1
2 7685910      2   33444446-47897      1997     8     0     3     4     7     9     3     2     1    -9     0     2

Any help is appreciated!

1 Answers1

3

We can change the blank elements in 'UniqueIdentifier' to 'NA', use na.locf to convert the 'NA' values to the previous non-NA element for the first 4 columns by looping with lapply, and then use dcast from data.table (after converting the 'data.frame' to 'data.table' (setDT(df1))) as it can take multiple value.var columns.

df1$UniqueIdentifier[df1$UniqueIdentifier==''] <- NA
library(zoo)
df1[1:4] <- lapply(df1[1:4], na.locf)

library(data.table)
dcast(setDT(df1), Testee+Gender+UniqueIdentifier+BirthYear~Graph, 
                        value.var=c('V1', 'V2', 'V3', 'V4'))
#    Testee Gender UniqueIdentifier BirthYear V1_1 V1_2 V1_3 V2_1 V2_2 V2_3 V3_1
#1: 7685906      1   33448683-29373      1996    4    7   -3    6    2    4    6
#2: 7685910      2   33444446-47897      1997    8    7    1    0    9   -9    3
#       V3_2 V3_3 V4_1 V4_2 V4_3
# 1:    9   -3    5    6   -1
# 2:    3    0    4    2    2

Or after pre-processing with na.locf, we can use reshape from base R

 reshape(df1, idvar=c('Testee', 'Gender', 
       'UniqueIdentifier', 'BirthYear'), 
          timevar='Graph',direction='wide')
 #   Testee Gender UniqueIdentifier BirthYear V1.1 V2.1 V3.1 V4.1 V1.2 V2.2 V3.2 V4.2 V1.3 V2.3 V3.3 V4.3
 #1 7685906      1   33448683-29373      1996    4    6    6    5    7    2    9    6   -3    4   -3   -1
 #4 7685910      2   33444446-47897      1997    8    0    3    4    7    9    3    2    1   -9    0    2
akrun
  • 874,273
  • 37
  • 540
  • 662