0

I am using the R programming language. In a previous question (R Language: Storing Results of a Loop into a Table) I learned how to iterate a loop for fixed values of a variable "i":

  #load libraries
    
        library(caret)
        library(rpart)
    
    #generate data
        
        a = rnorm(1000, 10, 10)
        
        b = rnorm(1000, 10, 5)
        
        c = rnorm(1000, 5, 10)
        
        group <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.5,0.5) )
        group_1 <- 1:1000
        
        #put data into a frame
        d = data.frame(a,b,c, group, group_1)
        
        d$group = as.factor(d$group)

 #start the loop


e <- d

#here is the "i" variable

for (i in 400:405) {
  d <- e
  d$group_1 = as.integer(d$group_1 > i)
  d$group_1 = as.factor(d$group_1)
  
  trainIndex <- createDataPartition(d$group_1, p = .8,list = FALSE,times = 1)
  training = d[ trainIndex,]
  test  <- d[-trainIndex,]
  
  
  fitControl <- trainControl(## 10-fold CV
    method = "repeatedcv",
    number = 10,
    ## repeated ten times
    repeats = 10)
  
  TreeFit <- train(group_1 ~ ., data = training,
                   method = "rpart2",
                   trControl = fitControl)
  
  pred = predict(TreeFit, test, type = "prob")
  labels = as.factor(ifelse(pred[,2]>0.5, "1", "0"))
  con = confusionMatrix(labels, test$group_1)
  
  #update results into table
  row = i - 399
  final_table[row,1] = con$overall[1]
  final_table[row,2] = i
  
}

        #place results in table
        final_table = matrix(1, nrow = 6, ncol=2)

Now, I am trying to replace "i" with a list of random numbers : (i in sample(100:400, 10))

However, this returns the following error (note: I changed final_table = matrix(1, nrow = 6, ncol=2) to final_table = matrix(1, nrow = 100, ncol=2)) :

Error in na.fail.default(list(group_1 = c(NA_integer_, NA_integer_, NA_integer_,  : 
  missing values in object

Can someone please show me what I am doing wrong? Is there an easier way to store all results from the loop into a matrix (or a table) without explicitly defining the number of rows required? Can the computer automatically keep adding a new row for every new value of "i"?

Thanks

stats_noob
  • 5,401
  • 4
  • 27
  • 83

1 Answers1

1

To use random numbers you can update your code as :

a = rnorm(1000, 10, 10)
b = rnorm(1000, 10, 5)
c = rnorm(1000, 5, 10)
group <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.5,0.5) )
group_1 <- 1:1000
#put data into a frame
d = data.frame(a,b,c, group, group_1)
d$group = as.factor(d$group)

#start the loop
#place results in table
final_table = matrix(1, nrow = 10, ncol=2)

e <- d
#here is the "i" variable
vec <- sample(100:400, 10)

for (i in seq_along(vec)) {
  d <- e
  d$group_1 = as.integer(d$group_1 > vec[i])
  d$group_1 = as.factor(d$group_1)
  
  trainIndex <- createDataPartition(d$group_1, p = .8,list = FALSE,times = 1)
  training = d[ trainIndex,]
  test  <- d[-trainIndex,]
  
  
  fitControl <- trainControl(## 10-fold CV
    method = "repeatedcv",
    number = 10,
    ## repeated ten times
    repeats = 10)
  
  TreeFit <- train(group_1 ~ ., data = training,
                   method = "rpart2",
                   trControl = fitControl)
  
  pred = predict(TreeFit, test, type = "prob")
  labels = as.factor(ifelse(pred[,2]>0.5, "1", "0"))
  con = confusionMatrix(labels, test$group_1)
  
  #update results into table
  final_table[i,1] = con$overall[1]
  final_table[i,2] = vec[i]
  
}
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • thank you for your answer! Do you know if its possible to use the caret::confusionMatrix() function with more than 2 classes? I figured out how to do it manually - but is there a built in function for this? https://stackoverflow.com/questions/65972553/r-multiclass-matrices thank you – stats_noob Jan 30 '21 at 23:06