0

I want define a function for variable selection

Var_Selection = function(data, target){
  target = as.character(target)
  form = paste(target, '~ ')
  vect = c()
  valid.names <- names(data)[names(data) != "target"]
  myAIC = data.frame(var = as.character(), AIC = as.numeric())

  repeat{
    a=Sys.time()
    Perf = data.frame(variable = as.character(), AIC = as.numeric())

    for(i in 2:length(valid.names)){
      # Régression logistique pour chaque variable
      mylogit = glm(as.formula(paste(form, i), data=data, family="binomial"))
      # AIC pour chaque variable 
      Perf = rbind(Perf, 
                   data.frame(variable= i, 
                              AIC= extractAIC(mylogit)[2]))

    } 

    # Keep best variable 
    vect = c(vect,new_var)
    myAIC = rbind(myAIC,
                  data.frame(var = new_var,
                             AIC = Perf[Perf$variable == new_var,'AIC']))
    plot(myAIC[,2],type = 'b', ylab = 'AIC', main = new_var)
    print(new_var)


    b=Sys.time()
    print(b-a)


  }
  #mylogit = glm(as.formula(paste(form, paste(vect, collapse = ' + '))), data=data, family="binomial")
  #return(mylogit)
}

I test with df :

Marque  Cat  Usag  Cyl  Sin 
ACM     S    Pr    12   1
ADIVA   S    A     28   0 
BMW     M    M     192  0

Target variable is Sin

I have the following error : "error is as.formula(paste(form,i), data = data, family = "binomial) : unused arguments (data = data, family = "binomial")

Naï
  • 53
  • 6

0 Answers0