0

I Used SMOTE and Tomek methods for imbalanced classes that I have. I'm trying to do boosted regression tree. It runs smoothly until I create the confusion matrix I have this error ( Error: data and reference should be factors with the same levels.

### SMOTE and Tomek
NOAA_SMOTE= read.csv("NOAA_SMOTE.csv", TRUE, ",")

train.index <- createDataPartition(NOAA_SMOTE$japon, p = .7, list = FALSE)
train <- NOAA_SMOTE[ train.index,]
test  <- NOAA_SMOTE[-train.index,]

tomek = ubTomek(train[,-1], train[,1])
model_train_tomek = cbind(tomek$X,tomek$Y)
names(model_train_tomek)[1] = "japon"

removed.index = tomek$id.rm

train$japon = as.factor(train$japon)
train_tomek = train[-removed.index,]

## SMOTE after tomek links 
traintomeksmote <- SMOTE(japon ~ ., train_tomek, perc.over = 2000,perc.under = 100)


fitControlSmoteTomek<- trainControl(## 10-fold CV
  method = "repeatedcv",
  number = 10,
  repeats = 3,
  ## Estimate class probabilities
  classProbs = TRUE,
  ## Evaluate performance using 
  ## the following function
  summaryFunction = twoClassSummary)

gbmGridSmoteTomek <-  expand.grid(interaction.depth = c(3,4, 5, 6), 
                        n.trees =  (1:30)*50,
                        shrinkage = c(0.1,0.001,0.75,0.0001),
                        n.minobsinnode = 10)


gbmFitNOAASMOTETomek <- caret::train (make.names(japon) ~ ., data = traintomeksmote, 
                         method = "gbm", 
                         trControl = fitControlSmoteTomek,
                         distribution = "bernoulli",  
                         verbose = FALSE, 
                         tuneGrid = gbmGridSmoteTomek,
                         bag.fraction=0.5, 
                         ## Specify which metric to optimize
                         metric = "ROC")




test$japon = as.factor(test$japon)
PredNOAASMOTETomek <- predict(gbmFitNOAASMOTETomek, newdata= test  ,type='prob')
cmSMOTETomekNOAA = confusionMatrix(PredNOAASMOTETomek , as.factor(test$japon), mode="everything")

part of the data 


[enter image description here](https://i.stack.imgur.com/jPgI9.png)

Hanan
  • 1
  • 1

0 Answers0