I'm attempting to parallelize a loop for conducting manual 10-fold cross validation with RF. However, what happens is only that the memory of the pc overloads and I'm forced to quit R. I don't understand what the problem might be, the code seems correct to me:
library(parallel);library(doParallel)
library(foreach)
#Setting trainControl function
control <- trainControl(method="none")
#Creating folders randomly for 10-fold CV
set.seed(123)
folds <- createFolds(data$y, k=10, list=FALSE)
data <- cbind(data, folds)
#Creating matrix for MAE
MAE = matrix(NA, nrow=10, ncol=1)
colnames(MAE)=c("MAE")
#Parallized CV loop
cl<-makeCluster(7)
registerDoParallel(cl)
foreach(n=1:10) %dopar% {
library(caret)
library(randomForest)
train <- data[data$folds!=n,]; train$folds=NULL
test <- data[data$folds==n,]; test$folds=NULL
set.seed(123)
rf= train(y~., data=train,
method="rf",
trControl=control)
pred <- predict(object = rf, newdata = test, type= "raw")
library(Metrics)
mae <- mae(test$y,pred)
MAE[n,] = c(mae)
}
stopCluster(cl)
Thank you for any suggestion!