Here's a vectorized version
vec_fun <- function(x, z) {
L <- length(x)
vec_z <- rep(0, L)
I <- seq(2, L, by=2)
vec_z[I] <- head(z-c(0, cumsum(I)), length(I))
cumsum(vec_z)
}
The alternative versions - sapply
& tidyverse
sapply_fun <- function(x, z) {
y=0
sapply(df$x,function(x)ifelse(x%%2==0,{y<<-y+z;z<<-z-x;y},y<<-y))
}
library(tidyverse)
library(tidyverse)
tidy_fun <- function(df) {
df %>%
filter(x %% 2 != 0) %>%
mutate(z = accumulate(c(z, x[-1] - 1), `-`)) %>%
right_join(df, by = c("x", "y")) %>%
mutate(z = lag(z), z = ifelse(is.na(z), 0, z)) %>%
mutate(y = cumsum(z)) %>%
select(-z) %>%
pluck("y")
}
Your data
df <- data.frame(x=1:5, y=0)
z <- 10
Let's make sure they all return the same result
identical(vec_fun(df$x, z), sapply_fun(df$x, z), tidy_fun(df))
# TRUE
Benchmark with small dataset - sapply_fun
appears to be slightly faster
library(microbenchmark)
microbenchmark(vec_fun(df$x, z), sapply_fun(df$x, z), tidy_fun(df), times=100L, unit="relative")
# Unit: relative
# expr min lq mean median uq max neval
# vec_fun(df$x, z) 1.349053 1.316664 1.256691 1.359864 1.348181 1.146733 100
# sapply_fun(df$x, z) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100
# tidy_fun(df) 411.409355 378.459005 168.689084 301.029545 270.519170 4.244833 100
Now with larger data.frame
df <- data.frame(x=1:1000, y=0)
z <- 10000
Same result - yes
identical(vec_fun(df$x, z), sapply_fun(df$x, z), tidy_fun(df))
# TRUE
Benchmark with larger dataset - now it's obvious vec_fun
is faster
library(microbenchmark)
microbenchmark(vec_fun(df$x, z), sapply_fun(df$x, z), tidy_fun(df), times=100L, unit="relative")
# Unit: relative
# expr min lq mean median uq max neval
# vec_fun(df$x, z) 1.00000 1.00000 1.00000 1.00000 1.00000 1.000 100
# sapply_fun(df$x, z) 42.69696 37.00708 32.19552 35.19225 27.82914 27.285 100
# tidy_fun(df) 259.87893 228.06417 201.43230 218.92552 172.45386 380.484 100