You can use foreach
which has the added advantage that it can be run in parallel if you have multiple cores in your CPU. Here is the non-parallel code:
library("iterators")
library("foreach")
library("FNN")
data(iris3)
newdata.training <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3])
newdata.test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3])
newdata.trainlabels <- factor(c(rep(1,25), rep(2,25), rep(3,25)))
k.values = seq(3, 15, 1)
start = 2 # to index sse array using k.values
sse = numeric(length = length(k.values))
results = foreach(i = iter(k.values),.combine = cbind) %do%
{
y_pred <-knn(train = newdata.training, test = newdata.test,
cl = newdata.trainlabels, k=i, prob = TRUE)
pred_y <- as.numeric(levels(y_pred)[y_pred])
sse[i - start] <- sum((as.numeric(newdata.trainlabels)-pred_y)^2)
pred_y
}
results1 = data.frame(results)
colnames(results1) = k.values
Here is the parallel version:
# Parallel version
library("iterators")
library("foreach")
library("parallel")
library("doParallel")
library("FNN")
data(iris3)
newdata.training <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3])
newdata.test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3])
newdata.trainlabels <- factor(c(rep(1,25), rep(2,25), rep(3,25)))
num.cores = detectCores()
clusters <- makeCluster(num.cores)
registerDoParallel(clusters)
k.values = seq(3, 15, 1)
start = 2 # to index sse array using k.values
sse = numeric(length = length(k.values))
results = foreach(i = iter(k.values),.combine = cbind, .packages=c("FNN")) %dopar%
{
y_pred <-knn(train = newdata.training, test = newdata.test,
cl = newdata.trainlabels, k=i, prob = TRUE)
pred_y <- as.numeric(levels(y_pred)[y_pred])
sse[i - start] <- sum((as.numeric(newdata.trainlabels)-pred_y)^2)
pred_y
}
results1 = data.frame(results)
colnames(results1) = k.values
stopCluster(clusters)
There are only a few differences between the non-parallel code and the parallel code. First, there are additional libraries to load. Second, you need to create and register the clusters that will do the parallel computation (and stop the clusters when you are done). Third, foreach
uses %dopar%
infix operator instead of %do%
. Fourth, the foreach
function needs the .packages
parameter to pass KNN
to each of the clusters.