2

I have this df where I pull the criteria from:

id Criteria 1 Criteria 2
1 3/1/2022 Black
2 5/2/2022 Black
3 3/1/2022 Blue

I want to use the criteria to sum amounts from the following df:

id Date Color Amount
1 3/1/2022 Black 15
2 5/2/2022 Red 10
3 3/1/2022 Blue 25
4 5/2/2022 Red 10
5 4/1/2022 Black 15
6 3/1/2022 Black 45

I want it to look like this:

id Criteria 1 Criteria 2 Amount
1 3/1/2022 Black 60
2 5/2/2022 Black 0
3 3/1/2022 Blue 25
Limey
  • 10,234
  • 2
  • 12
  • 32
rushi
  • 225
  • 1
  • 3
  • 7

6 Answers6

2

I suspect you may need to join > group_by > summarise(sum). Please share the data.

library(dplyr)

df1 %>%
    left_join(df2, by = c('Criteria1' = 'Date', 'Criteria2' = 'Color')) %>%
    group_by(Criteria1, Criteria2) %>%
    summarise(Amount = sum(Amount, na.rm = TRUE))
          
# A tibble: 3 × 3
# Groups:   Criteria1 [2]
  Criteria1 Criteria2 Amount
  <chr>     <chr>      <int>
1 3/1/2022  Black         60
2 3/1/2022  Blue          25
3 5/2/2022  Black          0
GuedesBF
  • 8,409
  • 5
  • 19
  • 37
1

The other answers are excellent, but an alternative using a combination of base R and dplyr in case it is easier to understand for some folks:

xx <- df2 %>%  # summarize the data with `Amount`
  group_by(Date, Color) %>%
  summarize(Amount = sum(Amount))

xy <- merge(df1, xx, # merge with df1
      by.x = c("Criteria1", "Criteria2"), 
      by.y = c("Date", "Color"), 
      all.x = TRUE)

xy[is.na(xy)] <- 0 # replace NAs with 0
xy <- xy[order(xy$id), c(3,1:2,4)] # sort and reorder (may be superfluous)

#   id Criteria1 Criteria2 Amount
# 1  1  3/1/2022     Black     60
# 3  2  5/2/2022     Black      0
# 2  3  3/1/2022      Blue     25

Data

df1 <- read.table(text = "id    Criteria1   Criteria2
1   3/1/2022    Black
2   5/2/2022    Black
3   3/1/2022    Blue", header = TRUE)

df2 <- read.table(text = "id    Date    Color   Amount
1   3/1/2022    Black   15
2   5/2/2022    Red 10
3   3/1/2022    Blue    25
4   5/2/2022    Red 10
5   4/1/2022    Black   15
6   3/1/2022    Black   45", header = TRUE)
jpsmith
  • 11,023
  • 5
  • 15
  • 36
1

Here is a base R solution with merge and aggregate.

merge(df1[-1], df2[-1], 
      by.x = c("Criteria1", "Criteria2"),
      by.y = c("Date", "Color"),
      all.x = TRUE) |>
  aggregate(Amount ~ Criteria1 + Criteria2, data = _, FUN = sum, na.action = na.pass, na.rm = TRUE)
#>   Criteria1 Criteria2 Amount
#> 1  3/1/2022     Black     60
#> 2  5/2/2022     Black      0
#> 3  3/1/2022      Blue     25

Created on 2023-02-23 with reprex v2.0.2


Data

df1 <- "id  Criteria1   Criteria2
1   3/1/2022    Black
2   5/2/2022    Black
3   3/1/2022    Blue"
df1 <- read.table(text = df1, header = TRUE)
df2 <- "id  Date    Color   Amount
1   3/1/2022    Black   15
2   5/2/2022    Red     10
3   3/1/2022    Blue    25
4   5/2/2022    Red     10
5   4/1/2022    Black   15
6   3/1/2022    Black   45"
df2 <- read.table(text = df2, header = TRUE)

Created on 2023-02-23 with reprex v2.0.2

Rui Barradas
  • 70,273
  • 8
  • 34
  • 66
1

Using data.table

library(data.table)
setDT(df1)[, Amount := df2[.SD, sum(Amount), 
  on = .(Date = Criteria1, Color = Criteria2), by = .EACHI]$V1]

-output

> df1[is.na(Amount), Amount  := 0]
> df1
   id Criteria1 Criteria2 Amount
1:  1  3/1/2022     Black     60
2:  2  5/2/2022     Black      0
3:  3  3/1/2022      Blue     25
akrun
  • 874,273
  • 37
  • 540
  • 662
0

Similar assumption as GuedesBF, but:

  • using id as a join var;
  • joining individually on each of Criteria #; then
  • summing all possible Amounts.

I don't get the right results, but there's no combination I found that produces your expected output ... so perhaps:

library(dplyr)
df1 %>%
  left_join(df2, by = c("id", "Criteria 1" = "Date")) %>%
  left_join(df2, by = c("id", "Criteria 2" = "Color")) %>%
  group_by(id, `Criteria 1`, `Criteria 2`) %>%
  summarize(Amount = sum(c(Amount.x, Amount.y), na.rm = TRUE)) %>%
  ungroup()
# # A tibble: 3 × 4
#      id `Criteria 1` `Criteria 2` Amount
#   <int> <chr>        <chr>         <int>
# 1     1 3/1/2022     Black            30
# 2     2 5/2/2022     Black            10
# 3     3 3/1/2022     Blue             50

If you need to join on both simultaneously, then this only differs from GuedesBF's answer by the inclusion of id:

df1 %>%
  left_join(df2, by = c("id", "Criteria 1" = "Date", "Criteria 2" = "Color")) %>%
  group_by(id, `Criteria 1`, `Criteria 2`) %>%
  summarize(Amount = sum(Amount, na.rm = TRUE)) %>%
  ungroup()
# # A tibble: 3 × 4
#      id `Criteria 1` `Criteria 2` Amount
#   <int> <chr>        <chr>         <int>
# 1     1 3/1/2022     Black            15
# 2     2 5/2/2022     Black             0
# 3     3 3/1/2022     Blue             25

Data

df1 <- structure(list(id = 1:3, "Criteria 1" = c("3/1/2022", "5/2/2022", "3/1/2022"), "Criteria 2" = c("Black", "Black", "Blue")), class = "data.frame", row.names = c(NA, -3L))
df2 <- structure(list(id = 1:6, Date = c("3/1/2022", "5/2/2022", "3/1/2022", "5/2/2022", "4/1/2022", "3/1/2022"), Color = c("Black", "Red", "Blue", "Red", "Black", "Black"), Amount = c(15L, 10L, 25L, 10L, 15L, 45L)), class = "data.frame", row.names = c(NA, -6L))
df3 <- structure(list(id = 1:3, "Criteria 1" = c("3/1/2022", "5/2/2022", "3/1/2022"), "Criteria 2" = c("Black", "Black", "Blue"), Amount = c(60L, 0L, 25L)), class = "data.frame", row.names = c(NA, -3L))
r2evans
  • 141,215
  • 6
  • 77
  • 149
  • 1
    This output is wrong, my guess is you cannot join nor group by `id`. – Rui Barradas Feb 23 '23 at 18:51
  • 2
    Yeah, it seems likely the `id` is a red herring. Meh, so many answers at this point that all dance around the same premise of join/summarize in various combinations of features/parameters. – r2evans Feb 23 '23 at 18:55
0

We can use {powerjoin}:

library(powerjoin)
power_left_join(
  df1,
  df2 |> summarize_by_keys(Amout = sum(Amount)),
  by = c("Criteria 1" = "Date", "Criteria 2" = "Color"),
  fill = 0
)
#>   id Criteria 1 Criteria 2 Amout
#> 1  1   3/1/2022      Black    60
#> 2  2   5/2/2022      Black     0
#> 3  3   3/1/2022       Blue    25

Created on 2023-03-17 with reprex v2.0.2

moodymudskipper
  • 46,417
  • 11
  • 121
  • 167