2

Note: this is somewhat related to a question I have previously asked here

Here is a subset of what my data looks like, as an example:

library(dplyr)

DF <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19), day = c("day1", "day2", "day3", 
"day4", "day5", "day6", "day6", "day7", "day8", "day9", "day10", 
"day10", "day11", "day12", "day13", "day14", "day14", "day14", 
"day14"), sent_to = c(NA, NA, "Blue Superstore", "Garden Cinema", 
"Pasta House", NA, NA, "Pizzaria", NA, "Ice Palace", NA, NA, 
"Shoes Centre", "Dreams Dessert", NA, "Chicken World", "Art Gallery", 
"Smoothie Hut", NA), received_from = c("ATM", "Sarah", NA, NA, 
NA, "Jane", "Joe", NA, "Sarah", NA, "Anna", "Jane", NA, NA, "Anna", 
NA, NA, NA, "Joe"), reference = c("add_cash", "gift", "shopping", 
"cinema_tickets", "meal", "reimbursed", "reimbursed", "meal", 
"reimbursed", "ice_rink_tickets", "reimbursed", "reimbursed", 
"shoes", "ice_cream", "reimbursed", "meal", "gallery_ticket", 
"drink", "reimbursed"), decrease = c(0, 0, 15.2, 10.8, 12.5, 
0, 0, 10, 0, 18, 0, 0, 15, 6.5, 0, 8, 3.5, 2, 0), increase = c(50, 
30, 0, 0, 0, 5.4, 7.25, 0, 10, 0, 6, 6, 0, 0, 21.5, 0, 0, 0, 
13.5), reimbursed_id = c(NA, NA, NA, "R", "R", "4", "5", "R", 
"8", "R", "10", "10", "R", "R", "13, 14", "R", "R", "R", "16, 17, 18"
), change = c(50, 30, -15.2, -10.8, -12.5, 5.4, 7.25, -10, 10, 
-18, 6, 6, -15, -6.5, 21.5, -8, -3.5, -2, 13.5), balance = c(50, 
80, 64.8, 54, 41.5, 46.9, 54.15, 44.15, 54.15, 36.15, 42.15, 
48.15, 33.15, 26.65, 48.15, 40.15, 36.65, 34.65, 48.15)), row.names = c(NA, 
-19L), class = c("tbl_df", "tbl", "data.frame"))
> DF
# A tibble: 19 × 10
      id day   sent_to         received_from reference        decrease increase reimbursed_id change balance
   <dbl> <chr> <chr>           <chr>         <chr>               <dbl>    <dbl> <chr>          <dbl>   <dbl>
 1     1 day1  NA              ATM           add_cash              0      50    NA             50       50  
 2     2 day2  NA              Sarah         gift                  0      30    NA             30       80  
 3     3 day3  Blue Superstore NA            shopping             15.2     0    NA            -15.2     64.8
 4     4 day4  Garden Cinema   NA            cinema_tickets       10.8     0    R             -10.8     54  
 5     5 day5  Pasta House     NA            meal                 12.5     0    R             -12.5     41.5
 6     6 day6  NA              Jane          reimbursed            0       5.4  4               5.4     46.9
 7     7 day6  NA              Joe           reimbursed            0       7.25 5               7.25    54.2
 8     8 day7  Pizzaria        NA            meal                 10       0    R             -10       44.2
 9     9 day8  NA              Sarah         reimbursed            0      10    8              10       54.2
10    10 day9  Ice Palace      NA            ice_rink_tickets     18       0    R             -18       36.2
11    11 day10 NA              Anna          reimbursed            0       6    10              6       42.2
12    12 day10 NA              Jane          reimbursed            0       6    10              6       48.2
13    13 day11 Shoes Centre    NA            shoes                15       0    R             -15       33.2
14    14 day12 Dreams Dessert  NA            ice_cream             6.5     0    R              -6.5     26.6
15    15 day13 NA              Anna          reimbursed            0      21.5  13, 14         21.5     48.2
16    16 day14 Chicken World   NA            meal                  8       0    R              -8       40.2
17    17 day14 Art Gallery     NA            gallery_ticket        3.5     0    R              -3.5     36.6
18    18 day14 Smoothie Hut    NA            drink                 2       0    R              -2       34.6
19    19 day14 NA              Joe           reimbursed            0      13.5  16, 17, 18     13.5     48.2

Explanation for reimbursed_id column:

  • R indicates the value in the decrease column is not representative of the user's actual spending because it includes the amount paid on someone's behalf
  • 4 (or any number) represents the id for which the user was reimbursed (returned the borrowed amount)
  • 13, 14 (or a list of comma-separated numbers) represents the id for which the user was reimbursed, but across multiple transactions

Desired outcome:

I would like to add an actual_decrease column to this dataset which essentially looks through the reimbursed_id column, notes the IDs affecting other rows, collects the reimbursed amount in the increase column for said rows, and subtracts it from the values in the decrease for the respective ID's.

Further details:

Please refer to the following image (contains what I'd like the actual_decrease column to look like): table with actual decrease and computation type

As you can see in the screenshot, there are a few different types of computation that have been applied to each row, depending on the contents of the reimbursed_id column.

If 'R' tagged, the computation for actual_decrease would depend on whether the reimbursement is for:

  • the total amount (one ID)
  • a portion of the total amount (one ID)
  • the total amount, but through multiple transactions (multiple IDs)
  • a portion of the total amount, but through multiple transactions (multiple IDs)

If not 'R' tagged then the computation for actual_decrease would just be the value in decrease.


So far, I only have the following (based on one of my previously asked questions):

DF %>%
  left_join(DF %>%
              filter(reference == "reimbursed") %>%
              group_by(id = as.numeric(reimbursed_id)) %>% # removes row 15 and 19 (contains comma-separated values)
              summarise(actual_decrease = sum(increase)),
            by = "id") %>%
  mutate(actual_decrease = ifelse(!is.na(actual_decrease),
                                  decrease - actual_decrease,
                                  decrease))

# A tibble: 19 × 11
      id day   sent_to         received_from reference        decrease increase reimbursed_id change balance actual_decrease
   <dbl> <chr> <chr>           <chr>         <chr>               <dbl>    <dbl> <chr>          <dbl>   <dbl>           <dbl>
 1     1 day1  NA              ATM           add_cash              0      50    NA             50       50              0   
 2     2 day2  NA              Sarah         gift                  0      30    NA             30       80              0   
 3     3 day3  Blue Superstore NA            shopping             15.2     0    NA            -15.2     64.8           15.2 
 4     4 day4  Garden Cinema   NA            cinema_tickets       10.8     0    R             -10.8     54              5.4 
 5     5 day5  Pasta House     NA            meal                 12.5     0    R             -12.5     41.5            5.25
 6     6 day6  NA              Jane          reimbursed            0       5.4  4               5.4     46.9            0   
 7     7 day6  NA              Joe           reimbursed            0       7.25 5               7.25    54.2            0   
 8     8 day7  Pizzaria        NA            meal                 10       0    R             -10       44.2            0   
 9     9 day8  NA              Sarah         reimbursed            0      10    8              10       54.2            0   
10    10 day9  Ice Palace      NA            ice_rink_tickets     18       0    R             -18       36.2            6   
11    11 day10 NA              Anna          reimbursed            0       6    10              6       42.2            0   
12    12 day10 NA              Jane          reimbursed            0       6    10              6       48.2            0   
13    13 day11 Shoes Centre    NA            shoes                15       0    R             -15       33.2           15   
14    14 day12 Dreams Dessert  NA            ice_cream             6.5     0    R              -6.5     26.6            6.5 
15    15 day13 NA              Anna          reimbursed            0      21.5  13, 14         21.5     48.2            0   
16    16 day14 Chicken World   NA            meal                  8       0    R              -8       40.2            8   
17    17 day14 Art Gallery     NA            gallery_ticket        3.5     0    R              -3.5     36.6            3.5 
18    18 day14 Smoothie Hut    NA            drink                 2       0    R              -2       34.6            2   
19    19 day14 NA              Joe           reimbursed            0      13.5  16, 17, 18     13.5     48.2            0   

But this code does not display my desired output for the actual_decrease column for all computation types - namely, it is incorrect for row 13 onwards.


Since, my actual dataset is quite large I'd prefer to avoid using loops.

Amy help is much appreciated :)


EDIT: This is what I'd like the dataset to look like:

# A tibble: 19 × 9
      id day   sent_to         received_from reference        decrease increase reimbursed_id actual_decrease
   <dbl> <chr> <chr>           <chr>         <chr>               <dbl>    <dbl> <chr>                   <dbl>
 1     1 day1  NA              ATM           add_cash              0      50    NA                       0   
 2     2 day2  NA              Sarah         gift                  0      30    NA                       0   
 3     3 day3  Blue Superstore NA            shopping             15.2     0    NA                      15.2 
 4     4 day4  Garden Cinema   NA            cinema_tickets       10.8     0    R                        5.4 
 5     5 day5  Pasta House     NA            meal                 12.5     0    R                        5.25
 6     6 day6  NA              Jane          reimbursed            0       5.4  4                        0   
 7     7 day6  NA              Joe           reimbursed            0       7.25 5                        0   
 8     8 day7  Pizzaria        NA            meal                 10       0    R                        0   
 9     9 day8  NA              Sarah         reimbursed            0      10    8                        0   
10    10 day9  Ice Palace      NA            ice_rink_tickets     18       0    R                        6   
11    11 day10 NA              Anna          reimbursed            0       6    10                       0   
12    12 day10 NA              Jane          reimbursed            0       6    10                       0   
13    13 day11 Shoes Centre    NA            shoes                15       0    R                        0   
14    14 day12 Dreams Dessert  NA            ice_cream             6.5     0    R                        0   
15    15 day13 NA              Anna          reimbursed            0      21.5  13, 14                   0   
16    16 day14 Chicken World   NA            meal                  8       0    R                        0   
17    17 day14 Art Gallery     NA            gallery_ticket        3.5     0    R                        0   
18    18 day14 Smoothie Hut    NA            drink                 2       0    R                        0   
19    19 day14 NA              Joe           reimbursed            0      13.5  16, 17, 18               0 
kiwi
  • 565
  • 3
  • 11
  • how should actual_decrease look like? – TarJae Apr 12 '23 at 19:15
  • Hi! I was wondering if you could provide more context on what is going on with this question. Are we looking at the transactions of one person, a company, or a group of people? I ask because there are so many empty values in the sent_to and received_from columns. – Mark Aug 14 '23 at 11:52

0 Answers0