1

Suppose I have this dataframe, df:

 UserID <- c(1, 1, 1, 5, 5)
 PathID <- c(1,2,3,1,2)
 Page <- c("home", "about", "services", "home", "pricing")
 df <- data.frame(UserID, PathID, Page)

       UserID PathID  Page
    1  1      1       home
    2  1      2       about
    3  1      3       services
    4  5      1       home
    5  5      2       pricing

I am trying to add a new row at the end of each PathID. I am using dplyr and part of my code is below:

group_by(UserID) %>%
summarise(Page,
          PathID = row_number())

I would like my dataframe output to look like this:

   UserID PathID  Page
1  1      1       home
2  1      2       about
3  1      3       services
4  1      4       end
4  5      1       home
5  5      2       pricing
6  5      3       end

Any help is much appreciated. Thank you.

Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
user2845095
  • 465
  • 2
  • 9

4 Answers4

1

You can split the data.frame using split and use the add_row function from dplyr on each split data.frame and combine it back with do.call(rbind)

library(dplyr)

do.call(rbind, lapply(split(df, UserID), \(x) add_row(x, UserID = unique(x$UserID), 
                                        PathID = max(x$PathID) + 1, 
                                        Page = "end")))

    UserID PathID     Page
1.1      1      1     home
1.2      1      2    about
1.3      1      3 services
1.4      1      4      end
5.1      5      1     home
5.2      5      2  pricing
5.3      5      3      end
Just James
  • 1,222
  • 2
  • 7
1

A slightly more tidyverse-focused nest+unnest variation of @JustJames' answer:

library(tidyverse)
df %>%
    group_by(UserID) %>%
    nest() %>%
    ungroup() %>%
    mutate(data = map(data, add_row, PathID = max(PathID) + 1, Page = "end")) %>%
    unnest(data)
## A tibble: 7 x 3
#  UserID PathID Page    
#   <dbl>  <dbl> <chr>   
#1      1      1 home    
#2      1      2 about   
#3      1      3 services
#4      1      4 end     
#5      5      1 home    
#6      5      2 pricing 
#7      5      4 end     
Maurits Evers
  • 49,617
  • 4
  • 47
  • 68
1

With dplyr, you could use group_modify + add_row:

library(dplyr)

df %>%
  group_by(UserID) %>%
  group_modify(~ .x %>%
    summarise(PathID = max(PathID) + 1, Page = "end") %>%
    add_row(.x, .)
  ) %>%
  ungroup()

# # A tibble: 7 × 3
#   UserID PathID Page
#    <dbl>  <dbl> <chr>
# 1      1      1 home
# 2      1      2 about
# 3      1      3 services
# 4      1      4 end
# 5      5      1 home
# 6      5      2 pricing
# 7      5      3 end
Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
1

You can also summarise() directly using cur_data():

library(tidyverse)

df %>%
  group_by(UserID) %>%
  summarise(
    cur_data() %>% 
      add_row(PathID = max(PathID) + 1, Page = "end")
  )
#> # A tibble: 7 × 3
#> # Groups:   UserID [2]
#>   UserID PathID Page    
#>    <dbl>  <dbl> <chr>   
#> 1      1      1 home    
#> 2      1      2 about   
#> 3      1      3 services
#> 4      1      4 end     
#> 5      5      1 home    
#> 6      5      2 pricing 
#> 7      5      3 end
Mikko Marttila
  • 10,972
  • 18
  • 31