0

I am doing an imbalanced classification task, so I want to use f-beta as performance measure. I used the library(mlr) to set measures=fbeta, which follows:

library(mlr)
#create tasks

## Create combined training data
train_data <- cbind(x_train, y_train)
valid_data <- cbind(x_valid,y_valid)

train_task_data <- rbind(train_data, valid_data)
size <- nrow(train_task_data)
train_ind <- seq_len(nrow(train_data))
validation_ind <- seq.int(max(train_ind) + 1, size)

## Create training task
train_task <- makeClassifTask(data = train_task_data, target = "DEFAULT", positive = 1)

testtask <- makeClassifTask(data = cbind(x_test,y_test),target = "DEFAULT")

#create learner
lrn <- makeLearner("classif.xgboost",predict.type = "response") ##predict.type = "prob"
lrn$par.vals <- list( objective="binary:logistic", eval_metric="logloss", nrounds=100L, eta=0.1)

#set parameter space
params <- makeParamSet( makeDiscreteParam("booster",values = c("gbtree","gblinear")),
                          makeIntegerParam("max_depth",lower = 9L,upper = 10L), 
                          makeNumericParam("min_child_weight",lower = 9L,upper = 10L), 
                          makeNumericParam("subsample",lower = 0.9,upper = 1), 
                          makeNumericParam("colsample_bytree",lower = 0.9,upper = 1))

#search strategy
ctrl <- makeTuneControlRandom(maxit = 10L)

#set parallel backend
library(parallel)
library(parallelMap) 
parallelStartSocket(cpus = detectCores())

mytune <- tuneParams(learner = lrn, task = train_task,
                     resampling = makeFixedHoldoutInstance(train_ind, validation_ind, size),
                     measures = fbeta, par.set = params, control = ctrl, show.info = T)

#parameter tuning
#set hyperparameters
lrn_tune <- setHyperPars(lrn,par.vals = mytune$x)

#train model
xgmodel <- train(learner = lrn_tune,task = train_task)

#predict model
xgpred <- predict(xgmodel,testtask)

confusionMatrix(xgpred$data$response,xgpred$data$truth)

However, this error is reported: Error in checkMeasures(measures, learner) : object 'fbeta' not found Besides, my dataset contains 150,000 instances, but based on the computed confusion matrix, they are less than 150,000.

> confusionMatrix(xgpred$data$response,xgpred$data$truth)
     [,1]   [,2]
[1,]    0      0
[2,]    0 149887

Update: I function to calculate f score is as follows, but I am not sure about it.

fbeta = makeMeasure(id = "fbeta", minimize = FALSE, best = 1, worst = 0,
                    properties = c("classif", "req.pred", "req.truth"),
                    name = "Fbeta measure",
                    note = "Defined as: (1+beta^2) * tp/ (beta^2 * sum(truth == positive) + sum(response == positive))",
                    fun = function(task, model, pred, feats, extra.args) {
                      beta = 1
                      beta = beta^2
                      truth = pred$data$truth
                      response = pred$data$response
                      positive = pred$task.desc$positive
                      (1+beta) * measureTP(truth, response, positive) /
                        (beta * sum(truth == positive) + sum(response == positive))
                    }
)
ebrahimi
  • 912
  • 2
  • 13
  • 32
  • There you go, you have not defined `fbeta`. The documentation sends you to https://rdrr.io/cran/mlr/man/getDefaultMeasure.html. – Roman Luštrik Dec 17 '22 at 07:31
  • Without your data, it's impossible to reproduce the error. I would also strongly suggest using [mlr3](https://mlr3.mlr-org.com), the successor of mlr. – Lars Kotthoff Dec 17 '22 at 17:56

0 Answers0