R Reshape Wide To Long Using Column Stub Strings

Question

data1=data.frame("School"=c(1,1,2,2,3,3,4,4),
                 "Fund"=c(0,1,0,1,0,1,0,1),
                 "Total_A_Grade5"=c(22,20,21,24,24,26,25,22),
                 "Group1_A_Grade5"=c(10,6,6,10,9,9,9,10),
                 "Group2_A_Grade5"=c(5,9,9,8,10,8,8,6),
                 "Total_B_Grade5"=c(23,33,19,21,19,23,20,21),
                 "Group1_B_Grade5"=c(8,7,7,10,9,9,5,5),
                 "Group2_B_Grade5"=c(6,10,7,6,6,5,9,9),
                 "Total_A_Grade6"=c(18,24,16,24,26,25,16,19),
                 "Group1_A_Grade6"=c(7,7,5,9,10,9,5,7),
                 "Group2_A_Grade6"=c(5,8,6,7,10,8,8,9),
                 "Total_B_Grade6"=c(26,23,22,24,21,22,24,19),
                 "Group1_B_Grade6"=c(10,10,6,10,7,8,8,7),
                 "Group2_B_Grade6"=c(9,6,9,6,7,6,9,9),
                 "Total_A_Grade7"=c(20,19,18,25,16,21,19,26),
                 "Group1_A_Grade7"=c(9,7,7,9,7,7,5,8),
                 "Group2_A_Grade7"=c(8,5,7,9,6,5,5,9),
                 "Total_B_Grade7"=c(25,21,24,25,18,18,27,18),
                 "Group1_B_Grade7"=c(10,10,10,7,5,6,8,5),
                 "Group2_B_Grade7"=c(9,6,8,10,8,6,10,6))


data2=data.frame("School"=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
                 "Fund"=c(0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1),
                 "Type"=c('Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2'),
                 "Class"=c('A','A','A','A','A','A','B','B','B','B','B','B','A','A','A','A','A','A','B','B','B','B','B','B'),
"Grade"=c(5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6),
"Score"=c(22,20,10,6,5,9,23,33,8,7,6,10,18,24,7,7,5,8,26,23,10,10,9,6))

I have 'data1' and want to reshape to make 'data2' which just shows example for School 1 grade 5 and 6 but I want all of data1 reshaped.

The column names of 'data1' contain rich information. For example, Group2_B_Grade6 indicated 'Type' = Group2, 'Class' = B, 'Grade' = 6. I wish to reshape 'data1' and then use these stubs separated by "_" as colnames to prepare 'data2'

data3=data.frame("School"=c(1,1,2,2,3,3,4,4),
                 "Fund"=c(0,1,0,1,0,1,0,1),
                 "Grade_5"=c(22,20,21,24,24,26,25,22),
                 "Grade_6"=c(10,6,6,10,9,9,9,10),
                 "Grade_7"=c(5,9,9,8,10,8,8,6))

@akrun i just checked and it works perfect! – bvowe Apr 06 '20 at 17:29 — bvowe, Apr 06 '20 at 17:29

score 2 · Answer 1 · answered Apr 06 '20 at 12:10

You can do this directly with pivot_longer with some regex in names_pattern.

tidyr::pivot_longer(data1, 
                    cols = -c(School, Fund), 
                    names_to = c('Type', 'Class', 'Grade'), 
                    names_pattern = '(.*?)_([A-Z])_Grade(\\d+)', 
                    values_to = 'Score')


# A tibble: 144 x 6
#   School  Fund Type   Class Grade Score
#    <dbl> <dbl> <chr>  <chr> <chr> <dbl>
# 1      1     0 Total  A     5        22
# 2      1     0 Group1 A     5        10
# 3      1     0 Group2 A     5         5
# 4      1     0 Total  B     5        23
# 5      1     0 Group1 B     5         8
# 6      1     0 Group2 B     5         6
# 7      1     0 Total  A     6        18
# 8      1     0 Group1 A     6         7
# 9      1     0 Group2 A     6         5
#10      1     0 Total  B     6        26
# … with 134 more rows

GGamba · Accepted Answer · 2020-04-06T11:26:07.347

1

Using dplyr (and tidyr):

library(dplyr)
library(tidyr)

data2 <- data1 %>% 
  pivot_longer(-c(School, Fund)) %>% 
  separate(name, into = c('Type', 'Class', 'Grade')) %>% 
  extract(Grade, 'Grade', "([0-9]+)")

data2
#> # A tibble: 144 x 6
#>    School  Fund Type   Class Grade value
#>     <dbl> <dbl> <chr>  <chr> <chr> <dbl>
#>  1      1     0 Total  A     5        22
#>  2      1     0 Group1 A     5        10
#>  3      1     0 Group2 A     5         5
#>  4      1     0 Total  B     5        23
#>  5      1     0 Group1 B     5         8
#>  6      1     0 Group2 B     5         6
#>  7      1     0 Total  A     6        18
#>  8      1     0 Group1 A     6         7
#>  9      1     0 Group2 A     6         5
#> 10      1     0 Total  B     6        26
#> # … with 134 more rows

^{Created on 2020-04-06 by the reprex package (v0.3.0)}

edited Apr 06 '20 at 11:26

answered Apr 06 '20 at 11:18

GGamba

13,140
3
38
47

thank you so much! if grade is more than 1 digit it only takes first, how can i allow it to take all digits? – bvowe Apr 06 '20 at 11:23
1

sry about that, fixed (`extract(Grade, 'Grade', "([0-9]+)")`) – GGamba Apr 06 '20 at 11:26
thanks again! i been trying to modify your code to the updated example for data3 where I wish to implement your approach in a case where i only need to make that transformation with columns with the name 'Grade' but have not been capable of doing it successfully. – bvowe Apr 06 '20 at 15:52

score 1 · Answer 3 · answered Apr 06 '20 at 16:37

We can use melt from data.table

library(data.table)
melt(setDT(data1), id.var = c('School', 'Fund'))[,
    c('Type', 'Class', 'Grade') := tstrsplit(variable, "_")][, 
     Grade := sub('Grade', '', Grade)][, variable := NULL][]
#     School Fund value   Type Class Grade
#  1:      1    0    22  Total     A     5
#  2:      1    1    20  Total     A     5
#  3:      2    0    21  Total     A     5
#  4:      2    1    24  Total     A     5
#  5:      3    0    24  Total     A     5
# ---                                     
#140:      2    1    10 Group2     B     7
#141:      3    0     8 Group2     B     7
#142:      3    1     6 Group2     B     7
#143:      4    0    10 Group2     B     7
#144:      4    1     6 Group2     B     7

R Reshape Wide To Long Using Column Stub Strings

3 Answers3