2

I coded a regularized(penalized) multivariate logistic regression in R by using hand-written gradient function. It works fine, but the problem is that it is too slow. Is there any way to improve the speed by using an optimization function?

I looked at this code, but this dose not use an optimization function. http://www.r-bloggers.com/machine-learning-ex5-2-regularized-logistic-regression/

Someone suggest me to use optim function with "BFGS" method. But I could not figure out know how to create cost functions(J) and the directions of the coefficients(gra) - especially the part for the directions of the coefficients.

Thanks ahead.

> #################################################
> # prepare dataset to solve logistic regression
> vY  <- c(1,1,1,1,1,0,0,0,0,0)

> vX1 <- c(1,3,5,2,7,6,9,7,8,9)
> vX2 <- c(1,0,0,0,0,0,0,2,1,1)
> mydata <- data.frame(y=vY,u=vX1,v=vX2)
> 
> #################################################
> # solve by hand written gradient
> vHiFeatures <- 1 #6
> la <- 0
> alpha <- 0.3
> 
> # plot the data
> plot(mydata$u[mydata$y == 0], mydata$v[mydata$y == 0], xlab="u", ylab="v")
> points(mydata$u[mydata$y == 1], mydata$v[mydata$y == 1], col="blue", pch=3)
> legend("topright", c("y=0","y=1"), pch=c(1, 3), col=c("black", "blue"), bty="n")
> 
> # build high order feature vector
> hi.features = function (f1,f2,deg) {
+   n = ncol(f1)
+   ma = matrix(rep(1,length(f1)))
+   for (i in 1:deg) {
+     for (j in 0:i)
+       ma = cbind(ma, f1^(i-j) * f2^j)
+   }
+   return(ma)
+ } 
> # sigmoid function
> g <- function (z) {
+   return (1 / (1 + exp(-z) ))
+ } # plot(g(c(1,2,3,4,5,6)), type="l")
> h <- function (x,th) {
+   return(g(x %*% th))
+ } 
> # cost function
> J <- function (x,y,th,m,la) {
+   pt = th
+   pt[1] = 0
+   A = (la/(2*m))* t(pt) %*% pt
+   return( -1/m * sum(y * log(h(x,th)) + (1 - y) * log(1 - h(x,th))) + A)
+ } 
> # gradient 
> grad <- function (x,y,th,m,la=1,al=1) {
+   G = (la/m * th)
+   G[1,] = 0
+   th <- th - al* ((1/m * t(x) %*% (h(x,th) - y)) +  G)
+   th
+ } 
> 
> m = length(mydata$u) # samples
> x = hi.features(mydata$u, mydata$v, vHiFeatures)
> n = ncol(x) # features
> y = matrix(mydata$y, ncol=1)
> 
> # use the cost function to check is works
> th1 <- matrix(0,n)
> jiter = array(0,c(100000,1))
> for (i in 1:100000) {
+   jiter[i] = J(x,y,th1,m,la)
+   # th1 = th1 - solve(H(x,y,th1,m,la)) %*% grad(x,y,th1,m,la) 
+   th1 <- grad(x,y,th1,m,la,alpha) 
+ }
> # check that is converging correctly
> plot(jiter, xlab="iterations", ylab="cost J", type="l")
> th1
          [,1]
[1,]  7.447780
[2,] -1.116073
[3,] -2.708777
> 
> #################################################
> # solve by glm function
> dTemp <- data.frame(y=y,x)
> # oModel <- glm(as.factor(y)~., dTemp, family=binomial(link=logit))
> oModel <- glm(as.factor(y)~., dTemp[,-2], family="binomial")
> oModel

       Call:  glm(formula = as.factor(y) ~ ., family = "binomial", data = dTemp[, 
    -2])

       Coefficients:
       (Intercept)           X2           X3  
             7.448       -1.116       -2.709  

       Degrees of Freedom: 9 Total (i.e. Null);  7 Residual
       Null Deviance:       13.86 
       Residual Deviance: 4.633     AIC: 10.63
Ray Gold
  • 168
  • 1
  • 9

0 Answers0