0

The code below works fine when the function names are passed in for the non-parallel version OR if the function name is hard coded in the parallel version. What changes do I need to make so that I pass in the the function name(s) for the parallel version (called with bpar set to TRUE)?

require(doParallel)

MyFunc <- function(fname, bpar) {
    fname1 <- function(n) { data.frame(a=rep(3,n), b=rep(4,n)) }
    fname2 <- function(n) { data.frame(a=rep(1,n), b=rep(2,n)) }

    nobs <- 100
    if(bpar) {
        incr <- ceiling(nobs/getDoParWorkers())
        out.dt <- foreach(ind=seq(1,nobs,incr), .combine=rbind, .multicombine=TRUE) %dopar% {
            do.call('rbind', lapply(seq(ind,min(ind+incr-1,nobs)), fname ))
        }
    } else {
        out.dt <- do.call('rbind', lapply(1:nobs, fname))
    }
}

df1 <- MyFunc("fname1", FALSE)  #works
df2 <- MyFunc("fname2", FALSE)  #works

cl <- makeCluster(3)
registerDoParallel(cl)
df3 <- MyFunc("fname1", TRUE)   #fails
df4 <- MyFunc("fname2", TRUE)   #fails
stopCluster(cl)

The error I get is:

Error in { : 
  task 1 failed - "object 'fname1' of mode 'function' was not found"
Error in { : 
  task 1 failed - "object 'fname2' of mode 'function' was not found"
ironv
  • 978
  • 10
  • 25
  • 3
    add `.export=fname` explicitly to your `foreach` loop as `foreach(ind=seq(1,nobs,incr), .combine=rbind, .multicombine=TRUE, .export=fname)` – Khashaa Mar 20 '15 at 04:56
  • Additionally, I suggest you change the `do.call('rbind')` call to `rbindlist()` of the `data.table` package (much faster): http://www.inside-r.org/packages/cran/data.table/docs/rbindlist – desertnaut Mar 20 '15 at 09:05

0 Answers0