0

I am trying to use mlr package to do classification using the following user defined function clasFunc . when I call the function using

 clasFunc("classif.lda")

I get an error message saying

Error in model.frame.default(Terms, newdata, na.action = na.pass, xlev = object$xlevels) : factor col1 has new levels 'new level'

I tried to fix that by merging factors of training and testing data using the code

 for(j in 1: ncol(train)){

   if(class(train[,j])=="factor"){
         lvls=   union(levels(train[,j]), levels(test[,j]))
         levels(train[,j]) =lvls
         levels(test[,j]) =lvls

   }

 }

But it doesn't seem to work.

Warning in makeTask(type = type, data = data, weights = weights, blocking = blocking, : Empty factor levels were dropped for columns: col1,col2,col3,col4,col5

Here is my complete code.

clasFunc = function(clsnam){
try(

for( i in 1:5){

  print(paste0("fold ", i))
  train = read.csv(file =paste0("D:\\arff_csv_folds\\real_original\\train", i,".csv"))
  test = read.csv(file =paste0("D:\\arff_csv_folds\\real_original\\test", i,".csv"))

 for(j in 1: ncol(train)){

   if(class(train[,j])=="factor"){
         lvls=   union(levels(train[,j]), levels(test[,j]))
         levels(train[,j]) =lvls
         levels(train[,j]) =lvls

   }

 }

  trainTask <- makeClassifTask(data = train,target = "cls", positive = "yes")
  testTask <- makeClassifTask(data = test, target = "cls",  positive = "yes")

 Clslearn = makeLearner(clsnam, predict.type = "prob")

  trained <- train(Clslearn, trainTask)

  predicted <- predict(trained, testTask)

  print(paste0(clsnam, " fold ", i," test auc:",auc(predicted$data$truth, predicted$data$prob.yes)))

}
)
}

Here is the complete output

[1] "fold 1"

Warning in makeTask(type = type, data = data, weights = weights, blocking = blocking, : Empty factor levels were dropped for columns: col1,col2,col3,col4,col5

[1] "classif.lda fold 1 test auc:0.673604162894944"

[1] "fold 2"

Warning in makeTask(type = type, data = data, weights = weights, blocking = blocking, : Empty factor levels were dropped for columns: col1,col2,col3,col4,col5

[1] "classif.lda fold 2 test auc:0.686717528654292"

[1] "fold 3"

Warning in makeTask(type = type, data = data, weights = weights, blocking = blocking, : Empty factor levels were dropped for columns: col1,col2,col3,col4,col5

Timing stopped at: 0 0 0

Error in model.frame.default(Terms, newdata, na.action = na.pass, xlev = object$xlevels) : factor col1 has new levels 'new level'

How can I fix this?

SaikiHanee
  • 849
  • 5
  • 13
  • 22

0 Answers0