0

I'm sending jobs to 2 remote nodes using furrr. Each node is set to create 4 workers.

I set this up using the following future::plan():

future::plan(
  list(
    future::tweak(future::cluster, workers = cl), 
    future::tweak(future::multisession, workers = 4)
  )
)

If I have an input vector of length 8, and I want to execute a function on each element, and each function call takes 2 seconds, then the total time should be 2 seconds plus some overhead to process all 8 input elements (2 nodes x 4 cores each = 8 parallel processes).

My question is around the way the 8 input elements are scheduled across the 2 nodes. I noticed that if I pre-chunk the 8 elements using a list input with 2 vector elements of length 4, then the execution time is much quicker (~5s).

If, however, I pass the vector of 8 elements to the outer furrr call with furrr::furrr_options(chunk_size = 4), the execution time to process the 8 elements is much slower (~10s).

I would expect both of these methods to execute in similar time, so there must be something I don't understand about how futures are distributed to the nodes.

Is there extra overhead when using chunk_size? What is different about the two methods shown below?

#### 1) Pre-chunk using list input ----

# setup cluster
worker_nodes <- c('node1', 'node2')

ssh_private_key_file <- "C:/Users/user/.ssh/id_rsa"
cl <-
  future::makeClusterPSOCK(
    worker_nodes,
    user = "user",
    rshopts = c(
      "-o", "StrictHostKeyChecking=no",
      "-o", "IdentitiesOnly=yes",
      "-i", ssh_private_key_file
    ),
    rscript = "/usr/bin/Rscript",
    homogeneous = FALSE,
    tries = 5
  )

# Set up the parallel plan using cluster
future::plan(
  list(
    future::tweak(future::cluster, workers = cl), 
    future::tweak(future::multisession, workers = 4)
  )
)

start <- proc.time()
result <-
  furrr::future_map(
    list(x = 1:4, y = 1:4),
    .f = ~ {
      
      furrr::future_map_chr(
        .x,
        .f = ~ {
          
          Sys.sleep(2)
          ""
        }
      )
    }
  )
proc.time() - start
#>    user  system elapsed 
#>    0.04    0.03    4.75

future::plan("sequential")

# Stop the cluster
parallel::stopCluster(cl)



#### 2) Define chunk size using furrr options ----

# setup cluster
worker_nodes <- c('node1', 'node2')

ssh_private_key_file <- "C:/Users/user/.ssh/id_rsa"
cl <-
  future::makeClusterPSOCK(
    worker_nodes,
    user = "user",
    rshopts = c(
      "-o", "StrictHostKeyChecking=no",
      "-o", "IdentitiesOnly=yes",
      "-i", ssh_private_key_file
    ),
    rscript = "/usr/bin/Rscript",
    homogeneous = FALSE,
    tries = 5
  )


# Set up the parallel plan using cluster
future::plan(
  list(
    future::tweak(future::cluster, workers = cl), 
    future::tweak(future::multisession, workers = 4)
  )
)

start <- proc.time()
result <-
  furrr::future_map(
    .options = furrr::furrr_options(
      chunk_size = 4
    ),
    1:8,
    .f = ~ {
      
      furrr::future_map_chr(
        .x,
        .f = ~ {
          
          Sys.sleep(2)
          ""
        }
      )
    }
  )
proc.time() - start
#>    user  system elapsed 
#>    0.00    0.00    9.88


future::plan("sequential")

parallel::stopCluster(cl)

Created on 2023-03-26 with reprex v2.0.2

Giovanni Colitti
  • 1,982
  • 11
  • 24

0 Answers0