0

The below code works fine, however, I am interested to run it in parallel. I have tried different plans within future and future.apply but couldn't managed. Any help appreciated. I am running on windows OS, 8 cores.

library(mlr3verse)
library(future.apply)
#> Warning: package 'future.apply' was built under R version 3.6.3
#> Loading required package: future
#> Warning: package 'future' was built under R version 3.6.3
library(future)
future::plan(multicore)

tsk_clf = tsk("sonar")
tsk_clf$col_roles$stratum = tsk_clf$target_names #stratification


lda  = lrn("classif.lda", predict_type = "response")
svm =  lrn("classif.svm", type = "C-classification", kernel= "radial",predict_type = "response")
xgb = lrn("classif.xgboost", predict_type = "response")
ranger_lrn = lrn("classif.ranger", predict_type = "response",importance ="permutation")

level_1 =
  gunion(list(
    PipeOpLearnerCV$new(lda, id = "lda_cv_l1"),
    PipeOpLearnerCV$new(svm, id = "svm_cv_l1"),
    PipeOpLearnerCV$new(xgb, id = "xgb_cv_l1")
  ))

level_2 = level_1 %>>%
  PipeOpFeatureUnion$new(3, id = "u2") %>>%
  PipeOpLearner$new(ranger_lrn,
                    id = "ranger_l2")
lrn = GraphLearner$new(level_2)
lrn$
  train(tsk_clf)$
  predict(tsk_clf)$
  score()
#> INFO  [17:04:06.984] Applying learner 'classif.lda' on task 'sonar' (iter 3/3) 
#> INFO  [17:04:07.052] Applying learner 'classif.lda' on task 'sonar' (iter 1/3) 
#> INFO  [17:04:07.097] Applying learner 'classif.lda' on task 'sonar' (iter 2/3) 
#> INFO  [17:04:07.340] Applying learner 'classif.svm' on task 'sonar' (iter 1/3) 
#> INFO  [17:04:07.382] Applying learner 'classif.svm' on task 'sonar' (iter 2/3) 
#> INFO  [17:04:07.430] Applying learner 'classif.svm' on task 'sonar' (iter 3/3) 
#> INFO  [17:04:08.627] Applying learner 'classif.xgboost' on task 'sonar' (iter 3/3) 
#> INFO  [17:04:08.672] Applying learner 'classif.xgboost' on task 'sonar' (iter 2/3) 
#> INFO  [17:04:08.715] Applying learner 'classif.xgboost' on task 'sonar' (iter 1/3)
#> classif.ce 
#> 0.01923077

Created on 2020-12-15 by the reprex package (v0.3.0)

devtools::session_info()
#> - Session info ----------------------------------------------------------
#>  setting  value                       
#>  version  R version 3.6.1 (2019-07-05)
#>  os       Windows 10 x64              
#>  system   x86_64, mingw32             
#>  ui       RTerm                       
#>  language (EN)                        
#>  collate  English_United States.1252  
#>  ctype    English_United States.1252  
#>  tz       Europe/Berlin               
#>  date     2020-12-15                  
#> 
#> - Packages --------------------------------------------------------------
#>  package       * version date       lib source        
#>  assertthat      0.2.1   2019-03-21 [1] CRAN (R 3.6.0)
#>  backports       1.1.4   2019-04-10 [1] CRAN (R 3.6.0)
#>  bbotk           0.2.0   2020-07-24 [1] CRAN (R 3.6.3)
#>  callr           3.5.1   2020-10-13 [1] CRAN (R 3.6.3)
#>  checkmate       2.0.0   2020-02-06 [1] CRAN (R 3.6.3)
#>  class           7.3-17  2020-04-26 [1] CRAN (R 3.6.3)
#>  cli             2.1.0   2020-10-12 [1] CRAN (R 3.6.3)
#>  codetools       0.2-16  2018-12-24 [1] CRAN (R 3.6.0)
#>  colorspace      1.4-1   2019-03-18 [1] CRAN (R 3.6.3)
#>  crayon          1.3.4   2017-09-16 [1] CRAN (R 3.6.0)
#>  data.table      1.13.0  2020-07-24 [1] CRAN (R 3.6.3)
#>  desc            1.2.0   2018-05-01 [1] CRAN (R 3.6.3)
#>  devtools        2.3.2   2020-09-18 [1] CRAN (R 3.6.3)
#>  digest          0.6.18  2018-10-10 [1] CRAN (R 3.6.0)
#>  dplyr           1.0.2   2020-08-18 [1] CRAN (R 3.6.3)
#>  e1071           1.7-3   2019-11-26 [1] CRAN (R 3.6.3)
#>  ellipsis        0.3.1   2020-05-15 [1] CRAN (R 3.6.3)
#>  evaluate        0.13    2019-02-12 [1] CRAN (R 3.6.0)
#>  fansi           0.4.0   2018-10-05 [1] CRAN (R 3.6.0)
#>  fs              1.5.0   2020-07-31 [1] CRAN (R 3.6.3)
#>  future        * 1.18.0  2020-07-09 [1] CRAN (R 3.6.3)
#>  future.apply  * 1.6.0   2020-07-01 [1] CRAN (R 3.6.3)
#>  generics        0.1.0   2020-10-31 [1] CRAN (R 3.6.3)
#>  ggplot2         3.3.2   2020-06-19 [1] CRAN (R 3.6.3)
#>  globals         0.12.5  2019-12-07 [1] CRAN (R 3.6.1)
#>  glue            1.4.2   2020-08-27 [1] CRAN (R 3.6.3)
#>  gtable          0.3.0   2019-03-25 [1] CRAN (R 3.6.3)
#>  highr           0.8     2019-03-20 [1] CRAN (R 3.6.0)
#>  htmltools       0.3.6   2017-04-28 [1] CRAN (R 3.6.0)
#>  knitr           1.22    2019-03-08 [1] CRAN (R 3.6.0)
#>  lattice         0.20-41 2020-04-02 [1] CRAN (R 3.6.3)
#>  lgr             0.3.4   2020-03-20 [1] CRAN (R 3.6.3)
#>  lifecycle       0.2.0   2020-03-06 [1] CRAN (R 3.6.3)
#>  listenv         0.8.0   2019-12-05 [1] CRAN (R 3.6.3)
#>  magrittr        1.5     2014-11-22 [1] CRAN (R 3.6.0)
#>  MASS            7.3-52  2020-08-18 [1] CRAN (R 3.6.3)
#>  Matrix          1.2-18  2019-11-27 [1] CRAN (R 3.6.3)
#>  memoise         1.1.0   2017-04-21 [1] CRAN (R 3.6.3)
#>  mlr3          * 0.5.0   2020-08-07 [1] CRAN (R 3.6.3)
#>  mlr3filters   * 0.3.0   2020-07-18 [1] CRAN (R 3.6.3)
#>  mlr3learners  * 0.3.0   2020-08-29 [1] CRAN (R 3.6.3)
#>  mlr3measures    0.2.0   2020-06-27 [1] CRAN (R 3.6.3)
#>  mlr3misc        0.5.0   2020-08-13 [1] CRAN (R 3.6.3)
#>  mlr3pipelines * 0.2.1   2020-08-18 [1] CRAN (R 3.6.3)
#>  mlr3tuning    * 0.2.0   2020-07-28 [1] CRAN (R 3.6.3)
#>  mlr3verse     * 0.1.3   2020-07-06 [1] CRAN (R 3.6.3)
#>  mlr3viz       * 0.4.0   2020-10-05 [1] CRAN (R 3.6.3)
#>  munsell         0.5.0   2018-06-12 [1] CRAN (R 3.6.3)
#>  paradox       * 0.4.0   2020-07-21 [1] CRAN (R 3.6.3)
#>  pillar          1.4.6   2020-07-10 [1] CRAN (R 3.6.3)
#>  pkgbuild        1.1.0   2020-07-13 [1] CRAN (R 3.6.3)
#>  pkgconfig       2.0.2   2018-08-16 [1] CRAN (R 3.6.0)
#>  pkgload         1.1.0   2020-05-29 [1] CRAN (R 3.6.3)
#>  prettyunits     1.0.2   2015-07-13 [1] CRAN (R 3.6.0)
#>  processx        3.4.4   2020-09-03 [1] CRAN (R 3.6.3)
#>  ps              1.3.4   2020-08-11 [1] CRAN (R 3.6.3)
#>  purrr           0.3.4   2020-04-17 [1] CRAN (R 3.6.3)
#>  R6              2.4.1   2019-11-12 [1] CRAN (R 3.6.3)
#>  ranger          0.12.1  2020-01-10 [1] CRAN (R 3.6.3)
#>  Rcpp            1.0.1   2019-03-17 [1] CRAN (R 3.6.0)
#>  remotes         2.2.0   2020-07-21 [1] CRAN (R 3.6.3)
#>  rlang           0.4.7   2020-07-09 [1] CRAN (R 3.6.3)
#>  rmarkdown       1.12    2019-03-14 [1] CRAN (R 3.6.0)
#>  rprojroot       1.3-2   2018-01-03 [1] CRAN (R 3.6.0)
#>  scales          1.1.1   2020-05-11 [1] CRAN (R 3.6.3)
#>  sessioninfo     1.1.1   2018-11-05 [1] CRAN (R 3.6.3)
#>  stringi         1.4.3   2019-03-12 [1] CRAN (R 3.6.0)
#>  stringr         1.4.0   2019-02-10 [1] CRAN (R 3.6.0)
#>  testthat        2.3.2   2020-03-02 [1] CRAN (R 3.6.3)
#>  tibble          3.0.4   2020-10-12 [1] CRAN (R 3.6.3)
#>  tidyselect      1.1.0   2020-05-11 [1] CRAN (R 3.6.3)
#>  usethis         1.6.3   2020-09-17 [1] CRAN (R 3.6.3)
#>  uuid            0.1-4   2020-02-26 [1] CRAN (R 3.6.3)
#>  vctrs           0.3.4   2020-08-29 [1] CRAN (R 3.6.3)
#>  withr           2.3.0   2020-09-22 [1] CRAN (R 3.6.3)
#>  xfun            0.6     2019-04-02 [1] CRAN (R 3.6.0)
#>  xgboost         1.2.0.1 2020-09-02 [1] CRAN (R 3.6.3)
#>  yaml            2.2.0   2018-07-25 [1] CRAN (R 3.6.0)
#> 
#> [1] C:/Users/mshey/Anaconda3/envs/rstudio/lib/R/library
Axeman
  • 32,068
  • 8
  • 81
  • 94
Sheykhmousa
  • 139
  • 9

1 Answers1

1

Looks fine to me. Note that multicore mode is not available on Windows and falls back to sequential. Might this be the culprit here?

PS: Next time you face a parallelization/runtime issue, benchmarking the runtime might help ;)

library(mlr3verse)
#> Loading required package: mlr3
#> Loading required package: mlr3filters
#> Loading required package: mlr3learners
#> Loading required package: mlr3pipelines
#> Loading required package: mlr3tuning
#> Loading required package: mlr3viz
#> Loading required package: paradox
library(future.apply)
#> Loading required package: future
library(future)
library(lgr)

lgr::get_logger("mlr3")$set_threshold("fatal")

tsk_clf <- tsk("sonar")
tsk_clf$col_roles$stratum <- tsk_clf$target_names # stratification


lda <- lrn("classif.lda", predict_type = "response")
svm <- lrn("classif.svm", type = "C-classification", kernel = "radial", predict_type = "response")
xgb <- lrn("classif.xgboost", predict_type = "response")
ranger_lrn <- lrn("classif.ranger", predict_type = "response", importance = "permutation")

level_1 <-
  gunion(list(
    PipeOpLearnerCV$new(lda, id = "lda_cv_l1"),
    PipeOpLearnerCV$new(svm, id = "svm_cv_l1"),
    PipeOpLearnerCV$new(xgb, id = "xgb_cv_l1")
  ))

level_2 <- level_1 %>>%
  PipeOpFeatureUnion$new(3, id = "u2") %>>%
  PipeOpLearner$new(ranger_lrn,
    id = "ranger_l2"
  )
lrn <- GraphLearner$new(level_2)

# parallel
plan(multicore)
time <- Sys.time()
lrn$
  train(tsk_clf)$
  predict(tsk_clf)$
  score()
#> classif.ce 
#> 0.01923077
Sys.time() - time
#> Time difference of 2.994049 secs

# sequential
plan(sequential)
lrn$
  train(tsk_clf)$
  predict(tsk_clf)$
  score()
#> classif.ce 
#> 0.01923077
Sys.time() - time
#> Time difference of 4.276779 secs

Created on 2020-12-20 by the reprex package (v0.3.0)

Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value                                      
#>  version  R version 4.0.3 Patched (2020-12-10 r79607)
#>  os       macOS Big Sur 10.16                        
#>  system   x86_64, darwin17.0                         
#>  ui       X11                                        
#>  language (EN)                                       
#>  collate  en_US.UTF-8                                
#>  ctype    en_US.UTF-8                                
#>  tz       Europe/Berlin                              
#>  date     2020-12-20                                 
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package       * version    date       lib source                          
#>  assertthat      0.2.1      2019-03-21 [1] CRAN (R 4.0.3)                  
#>  backports       1.2.1      2020-12-09 [1] CRAN (R 4.0.3)                  
#>  bbotk           0.2.2      2020-12-20 [1] Github (mlr-org/bbotk@5acf598)  
#>  callr           3.5.1      2020-10-13 [1] CRAN (R 4.0.3)                  
#>  checkmate       2.0.0      2020-02-06 [1] CRAN (R 4.0.3)                  
#>  class           7.3-17     2020-04-26 [2] CRAN (R 4.0.3)                  
#>  cli             2.2.0      2020-11-20 [1] CRAN (R 4.0.3)                  
#>  codetools       0.2-18     2020-11-04 [2] CRAN (R 4.0.3)                  
#>  colorspace      2.0-0      2020-11-11 [1] CRAN (R 4.0.3)                  
#>  crayon          1.3.4      2017-09-16 [1] CRAN (R 4.0.3)                  
#>  data.table      1.13.4     2020-12-08 [1] CRAN (R 4.0.3)                  
#>  desc            1.2.0      2018-05-01 [1] CRAN (R 4.0.3)                  
#>  devtools        2.3.2      2020-09-18 [1] CRAN (R 4.0.3)                  
#>  digest          0.6.27     2020-10-24 [1] CRAN (R 4.0.3)                  
#>  dplyr           1.0.2      2020-08-18 [1] CRAN (R 4.0.3)                  
#>  e1071           1.7-4      2020-10-14 [1] CRAN (R 4.0.3)                  
#>  ellipsis        0.3.1      2020-05-15 [1] CRAN (R 4.0.3)                  
#>  evaluate        0.14       2019-05-28 [1] CRAN (R 4.0.3)                  
#>  fansi           0.4.1      2020-01-08 [1] CRAN (R 4.0.3)                  
#>  fs              1.5.0      2020-07-31 [1] CRAN (R 4.0.3)                  
#>  future        * 1.21.0     2020-12-10 [1] CRAN (R 4.0.3)                  
#>  future.apply  * 1.6.0      2020-07-01 [1] CRAN (R 4.0.3)                  
#>  generics        0.1.0      2020-10-31 [1] CRAN (R 4.0.3)                  
#>  ggplot2         3.3.2      2020-06-19 [1] CRAN (R 4.0.3)                  
#>  globals         0.14.0     2020-11-22 [1] CRAN (R 4.0.3)                  
#>  glue            1.4.2      2020-08-27 [1] CRAN (R 4.0.3)                  
#>  gtable          0.3.0      2019-03-25 [1] CRAN (R 4.0.3)                  
#>  highr           0.8        2019-03-20 [1] CRAN (R 4.0.3)                  
#>  htmltools       0.5.0      2020-06-16 [1] CRAN (R 4.0.3)                  
#>  knitr           1.30       2020-09-22 [1] CRAN (R 4.0.3)                  
#>  lattice         0.20-41    2020-04-02 [2] CRAN (R 4.0.3)                  
#>  lgr           * 0.4.1      2020-10-20 [1] CRAN (R 4.0.3)                  
#>  lifecycle       0.2.0      2020-03-06 [1] CRAN (R 4.0.3)                  
#>  listenv         0.8.0      2019-12-05 [1] CRAN (R 4.0.3)                  
#>  magrittr        2.0.1      2020-11-17 [1] CRAN (R 4.0.3)                  
#>  MASS            7.3-53     2020-09-09 [2] CRAN (R 4.0.3)                  
#>  Matrix          1.2-18     2019-11-27 [2] CRAN (R 4.0.3)                  
#>  memoise         1.1.0      2017-04-21 [1] CRAN (R 4.0.3)                  
#>  mlr3          * 0.9.0-9000 2020-12-20 [1] Github (mlr-org/mlr3@f9ac360)   
#>  mlr3filters   * 0.4.0      2020-11-10 [1] CRAN (R 4.0.3)                  
#>  mlr3learners  * 0.4.3      2020-12-08 [1] CRAN (R 4.0.3)                  
#>  mlr3measures    0.3.0      2020-10-05 [1] CRAN (R 4.0.3)                  
#>  mlr3misc        0.6.0      2020-11-17 [1] CRAN (R 4.0.3)                  
#>  mlr3pipelines * 0.3.2      2020-12-17 [1] CRAN (R 4.0.3)                  
#>  mlr3tuning    * 0.5.0      2020-12-07 [1] CRAN (R 4.0.3)                  
#>  mlr3verse     * 0.1.3      2020-07-06 [1] CRAN (R 4.0.3)                  
#>  mlr3viz       * 0.5.0      2020-11-02 [1] CRAN (R 4.0.3)                  
#>  munsell         0.5.0      2018-06-12 [1] CRAN (R 4.0.3)                  
#>  paradox       * 0.6.0-9000 2020-12-20 [1] Github (mlr-org/paradox@99cfc0f)
#>  parallelly      1.22.0     2020-12-13 [1] CRAN (R 4.0.3)                  
#>  pillar          1.4.7      2020-11-20 [1] CRAN (R 4.0.3)                  
#>  pkgbuild        1.2.0      2020-12-15 [1] CRAN (R 4.0.3)                  
#>  pkgconfig       2.0.3      2019-09-22 [1] CRAN (R 4.0.3)                  
#>  pkgload         1.1.0      2020-05-29 [1] CRAN (R 4.0.3)                  
#>  prettyunits     1.1.1      2020-01-24 [1] CRAN (R 4.0.3)                  
#>  processx        3.4.5      2020-11-30 [1] CRAN (R 4.0.3)                  
#>  ps              1.5.0      2020-12-05 [1] CRAN (R 4.0.3)                  
#>  purrr           0.3.4      2020-04-17 [1] CRAN (R 4.0.3)                  
#>  R6              2.5.0      2020-10-28 [1] CRAN (R 4.0.2)                  
#>  ranger          0.12.1     2020-01-10 [1] CRAN (R 4.0.3)                  
#>  Rcpp            1.0.5      2020-07-06 [1] CRAN (R 4.0.3)                  
#>  remotes         2.2.0      2020-07-21 [1] CRAN (R 4.0.3)                  
#>  rlang           0.4.9      2020-11-26 [1] CRAN (R 4.0.3)                  
#>  rmarkdown       2.6        2020-12-14 [1] CRAN (R 4.0.3)                  
#>  rprojroot       2.0.2      2020-11-15 [1] CRAN (R 4.0.3)                  
#>  scales          1.1.1      2020-05-11 [1] CRAN (R 4.0.3)                  
#>  sessioninfo     1.1.1      2018-11-05 [1] CRAN (R 4.0.3)                  
#>  stringi         1.5.3      2020-09-09 [1] CRAN (R 4.0.3)                  
#>  stringr         1.4.0      2019-02-10 [1] CRAN (R 4.0.3)                  
#>  testthat        3.0.1      2020-12-20 [1] Github (r-lib/testthat@e99155a) 
#>  tibble          3.0.4      2020-10-12 [1] CRAN (R 4.0.3)                  
#>  tidyselect      1.1.0      2020-05-11 [1] CRAN (R 4.0.3)                  
#>  usethis         2.0.0.9000 2020-12-20 [1] Github (r-lib/usethis@c1e8ed6)  
#>  uuid            0.1-4      2020-02-26 [1] CRAN (R 4.0.3)                  
#>  vctrs           0.3.6      2020-12-17 [1] CRAN (R 4.0.3)                  
#>  withr           2.3.0      2020-09-22 [1] CRAN (R 4.0.3)                  
#>  xfun            0.19       2020-10-30 [1] CRAN (R 4.0.3)                  
#>  xgboost         1.2.0.1    2020-09-02 [1] CRAN (R 4.0.3)                  
#>  yaml            2.2.1      2020-02-01 [1] CRAN (R 4.0.3)                  
#> 
#> [1] /Users/pjs/Library/R/4.0/library
#> [2] /Library/Frameworks/R.framework/Versions/4.0/Resources/library
pat-s
  • 5,992
  • 1
  • 32
  • 60
  • I run the same model as yours, on a Linux machine (Ubuntu 20.04, R 4.0.3) with a different dataset (~180K samples, 100 features) however, since producing reprex is difficult in this case I am writing runtime as following: *Runtime 1 spcv* - `lda 9 sec`, - `xgb 4 sec`, - `svm 53673 sec` ~15 hours. It seems resampling doesn't run in parallel when it comes to `svm`. – Sheykhmousa Dec 22 '20 at 11:18
  • It was not clear in your question that you are having issues with a specific model. Your questions and my answer benchmark the runtime of all models and hence is of no help here. mlr3 has no influence on model fitting times. mlr3 parallelizes the different train/predict and tuning steps which is well tested. Maybe your tuning range for SVM is too large? Everything else at this point is just speculation though. – pat-s Dec 23 '20 at 07:11
  • This question is answered - please do not conduct major edits here. If you come up with a new reproducible way to showcase your SVM specific issues, please create a new question. – pat-s Dec 23 '20 at 07:12
  • "Maybe your tuning range for SVM is too large?" No, actually I used the default parameters as it is (and also written in the above-mentioned code). `parallwlSVM`, though works in parallel, however, have some out-of-memory problem with my dataset and also that it is not part of the mlr3 eco-system. – Sheykhmousa Dec 23 '20 at 18:25
  • I am still confused what you mean by "works in parallel". mlr3 parallelizes everything it can do (if desired) using the {future} package. What happens on the algorithm level is a different thing. But even there, no SVM implementation should be so slow. SVM can get stuck if it cannot converge when trying to fit a model with certain hyperparameter settings. SVM always needs tuning and the default params should not be used as is (which applies to almost every algorithm). – pat-s Dec 24 '20 at 13:29
  • I posted a new question [HERE](https://stackoverflow.com/questions/65686780/different-runtime-for-svm-and-ranger-using-the-same-task) to make my point clear. – Sheykhmousa Jan 12 '21 at 15:21