In dplyr
, you can use join_by
for inequality joins. Use closest
to get only the closest match:
sales <- tibble(
id = c(1L, 1L, 1L, 2L, 2L),
sale_date = as.Date(c("2018-12-31", "2019-01-02", "2019-01-05", "2019-01-04", "2019-01-01"))
)
sales
# id sale_date
# 1 1 2018-12-31
# 2 1 2019-01-02
# 3 1 2019-01-05
# 4 2 2019-01-04
# 5 2 2019-01-01
promos <- tibble(
id = c(1L, 1L, 2L),
promo_date = as.Date(c("2019-01-01", "2019-01-05", "2019-01-02"))
)
# id promo_date
# 1 1 2019-01-01
# 2 1 2019-01-05
# 3 2 2019-01-02
left_join(sales, promos, join_by(id, sale_date >= promo_date))
# id sale_date promo_date
# 1 1 2018-12-31 NA
# 2 1 2019-01-02 2019-01-01
# 3 1 2019-01-05 2019-01-01
# 4 1 2019-01-05 2019-01-05
# 5 2 2019-01-04 2019-01-02
# 6 2 2019-01-01 NA