1

I have a dataset that looks like this.

 Day|Population|Red|Yellow|Orange|Green
  1       30         15   3      4      8
  2       50         10   30     5      5
  3       10          3    6     1      0
  4       25          2   10    10      3

I want to create something that looks like this

 Day|Color            |Population
  1  Green                 8
  1  Red,Orange,Yellow    22  
  2  Green                 5
  2  Red,Orange,Yellow    45
  3  Green                 0
  3  Red,Orange,Yellow    10 
  4  Green                 3
  4  Red,Orange,Yellow    22

I have something that looks like this but it doesn't work

df<- rbind(
           summarise(df,Day,Population=df$Green,Color="Green"),
           summarise(df,Day,Population=sum(df$Red,df$Yellow,df$Orange),
           Color="Red,Orange,Yellow")) 


 
user35131
  • 1,105
  • 6
  • 18

1 Answers1

0

Here is one approach using data.table

library(data.table)

colors = names(df)[3:length(names(df))]
target = "Green"
non_targets = setdiff(colors, target)

setDT(df)

rbindlist(list(
  df[, .(Day, Color = target, Population = get(target))],
  df[, .(Day, Color = paste0(non_targets, collapse="|"), Population=Population-get(target))]
))[order(Day)]

Output:

   Day             Color Population
1:   1             Green          8
2:   1 Red|Yellow|Orange         22
3:   2             Green          5
4:   2 Red|Yellow|Orange         45
5:   3             Green          0
6:   3 Red|Yellow|Orange         10
7:   4             Green          3
8:   4 Red|Yellow|Orange         22

Another approach is to pivot longer and manipulate from there. That approach is illustrated using dplyr and tidyr


colors = names(df)[3:length(names(df))]
target = "Green"
non_targets = setdiff(colors, target)

df_long = pivot_longer(df, -c(Day:Population), names_to = "Color")

bind_rows(
    df_long %>%
        filter(Color==target) %>%
        select(Day,
               Color,
               Population=value
               ),
    df_long %>%
        group_by(Day) %>%
        summarize(Population = sum(value) - value[Color==target]) %>%
        mutate(Color = paste0(non_targets,
                              collapse="|"
                              )
               )
    ) %>%
arrange(Day)

Output:

# A tibble: 8 × 3
    Day Color             Population
  <int> <chr>                  <int>
1     1 Green                      8
2     1 Red|Yellow|Orange         22
3     2 Green                      5
4     2 Red|Yellow|Orange         45
5     3 Green                      0
6     3 Red|Yellow|Orange         10
7     4 Green                      3
8     4 Red|Yellow|Orange         22
GuedesBF
  • 8,409
  • 5
  • 19
  • 37
langtang
  • 22,248
  • 1
  • 12
  • 27