Agreed with @shea: a reproducible example would help.
Here is one, from what I understood:
set.seed(1)
N = 200
# Historical data with known return date
old_data = data.frame( due_date = as.Date("2019-04-01") + floor(runif(N, 0, 30)) )
old_data$return_date = old_data$due_date + round(rnorm(N, 0, 5))
# Currently borrowed books
current_data = data.frame( due_date = as.Date("2019-05-10") + floor(runif(N, 0, 30)) )
If I understood correctly, you want to have an estimation of the distribution of return_date
(not yet known) on current_data
. Here is a solution, with convolution computed manually: this is not efficient but easily understandable.
# For semantics, I renamed your borrow_length into borrow_delay
old_data$borrow_delay = old_data$return_date - old_data$due_date
# Compute its distribution (no smoothing)
distr_delay = as.data.frame(prop.table(table(delay = old_data$borrow_delay)), responseName="p_delay")
distr_delay$delay = as.integer(distr_delay$delay)
# Counts by due date
tab_volume = as.data.frame(table(due_date = current_data$due_date))
tab_volume$due_date = as.Date(as.character(tab_volume$due_date))
# Explicit convolution
distr_return = merge(tab_volume, distr_delay)
distr_return$return_date = with(distr_return, due_date + delay)
distr_return$expected_n_returns = with(distr_return, Freq*p_delay)
distr_return = with(distr_return, tapply(expected_n_returns, return_date, sum))
# Reformat
distr_return = data.frame(
return_date = as.Date(names(distr_return)),
expected_n_returns = c(distr_return)
)
# Sanity check: sum of expectations is 200 (the number of books borrowed)
sum(distr_return$expected_n_returns)
with(distr_return, plot(return_date, expected_n_returns))