0

I am trying to estimate parameters in a beta regression using optim in R. I can do so if I parameterize the model with mu and phi as is done in the package betareg. That optim code is shown below. However, I cannot estimate alpha and beta directly with optim. I have tried maybe a half-doze variations of the code at the bottom. I suspect this is more of a coding issue than a statistical issue, but can move this to Cross Validated if requested.

First I create simulated data with 1000 samples of the proportion my.survival in each of 2 years. I model my.survival as a function of year:

set.seed(1234)

# create data
year     <-  1:2
years    <- length(year)  # number of years
n.trials <-  1000         # samples per year

# define parameters in linear predictor
B0       <-  0.7
B1       <- -0.05

# obtain mu and phi
my.means <- exp(B0 + B1 * year) / (1 + exp(B0 + B1 * year))
my.means

my.phi <- 100

# convert mu and phi into annual alpha and beta
ab.fun <- function(mu, phi)
{
  a <- mu * phi
  b <- phi - mu * phi
  ab <- data.frame(a, b)
  return(ab)
}

alphabeta <- ab.fun(my.means, my.phi)
alphabeta
#          a        b
# 1 65.70105 34.29895
# 2 64.56563 35.43437

my.alpha <- alphabeta$a
my.beta  <- alphabeta$b

# obtain random samples of annual proportion surviving
my.survival <- rep(NA, (years*n.trials))
my.year     <- rep(NA, (years*n.trials))

iter <- 1

for(i in 1:years) {
     for(j in 1:n.trials) {
          my.survival[iter] <- rbeta(1, my.alpha[i], my.beta[i])
          my.year[iter] <- i
          iter <- iter + 1
     }
}

head(my.survival)
head(my.year)

Then I estimate the parameters with the betareg package:

library(betareg)

gy <- betareg(my.survival ~ my.year)
summary(gy)
# Call:
# betareg(formula = my.survival ~ my.year)
# 
# Standardized weighted residuals 2:
#     Min      1Q  Median      3Q     Max 
# -3.4235 -0.6520 -0.0099  0.6561  3.5155 
# 
# Coefficients (mean model with logit link):
#              Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.721712   0.014738   48.97  < 2e-16 ***
# my.year     -0.067093   0.009293   -7.22 5.19e-13 ***
# 
# Phi coefficients (precision model with identity link):
#       Estimate Std. Error z value Pr(>|z|)    
# (phi)   100.69       3.17   31.77   <2e-16 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
# 
# Type of estimator: ML (maximum likelihood)
# Log-likelihood:  3268 on 3 Df
# Pseudo R-squared: 0.02544
# Number of iterations: 8 (BFGS) + 2 (Fisher scoring) 
# Warning message:
# In deparse(x$call, width.cutoff = floor(getOption("width") * 0.85)) :
#   invalid 'cutoff' value for 'deparse', using default

I obtain virtually the same estimates of B0, B1 and phi with optim:

# estimate parameters with optim
beta.reg = function(betas, my.survival, my.year){

     phi   = betas[1]
     B0    = betas[2]
     B1    = betas[3]

     n <- length(my.survival)
     llh <- rep(0, n)

     for(i in 1:n){

          mu <- exp(B0 + B1 * my.year[i]) / (1 + exp(B0 + B1 * my.year[i]))

          alpha <- mu * phi
          beta  <- phi - mu * phi

          llh[i] = llh[i] + dbeta(my.survival[i], alpha, beta, log = TRUE)
     }

     -sum(llh)
}

alpha.init <- 2
beta.init  <- 2
phi.init   <- 25
B0.init    <- 0
B1.init    <- 0

my.model <- optim(c(phi.init, B0.init, B1.init), 
                 beta.reg, my.survival = my.survival, my.year = my.year, method = "BFGS", hessian=TRUE)


my.model$par
#[1] 100.71561736   0.72166040  -0.06705041

my.model$value
#[1] -3268.381

covmat <- solve(my.model$hessian)

mySE <- diag(covmat)^0.5
mySE
#[1] 3.170301989 0.014736516 0.009291484

However, I cannot estimate the annual alpha's and beta's with optim. I have tried several variations of the code below. In the version of the code below I simplied the formulae of mu by hardcoding the values of year. I know I can derive alpha and beta from the estimates of mu and phi from the optim code above and use the delta method to obtain SE's. However, I thought it would be more convenient and ultimately useful to estimate alpha and beta directly with optim if possible.

I wonder whether the issue is optim does not allow derived parameters and below alpha and beta are derived from phi, B0 and B1?

# estimate parameters with optim
beta.reg = function(betas, my.survival, my.year){

     B0           = betas[1]
     B1           = betas[2]
     alpha1       = betas[3]
     alpha2       = betas[4]
     beta1        = betas[5]
     beta2        = betas[6]
     phi          = betas[7]

     n <- length(my.survival)
     llh <- rep(0, n)

     # here I try to simplify by hardcoding the values of year to 1:2
     mu1  <- exp(B0 + B1 * 1)  / (1 + exp(B0 + B1 * 1))
     mu2  <- exp(B0 + B1 * 2)  / (1 + exp(B0 + B1 * 2))

     alpha1   = mu1 * phi
     beta1    = phi - mu1 * phi
     alpha2   = mu2 * phi
     beta2    = phi - mu2 * phi

     iter <- 1

     for(i in 1:years){
          for(i in 1:n.trials){
               if(i ==  1) llh[iter] = llh[iter] + dbeta(my.survival[iter], alpha1, beta1, log = TRUE)
               if(i ==  2) llh[iter] = llh[iter] + dbeta(my.survival[iter], alpha2, beta2, log = TRUE)
               iter <- iter + 1
          }
     }
     -sum(llh)
}

B0.init    <- 0
B1.init    <- 0
alpha.init <- rep(2, years)
beta.init  <- rep(2, years)
phi.iter   <- 100

my.model <- optim(c(B0.init, B1.init, alpha.init, beta.init, phi.iter), 
                 beta.reg, my.survival = my.survival, my.year = my.year, method = "BFGS", hessian=TRUE)


my.model$par
#[1]   1.0013198  -0.2096154   2.0000000   2.0000000   2.0000000   2.0000000 338.7167214

my.model$value
#[1] -9.090913

covmat <- solve(my.model$hessian)
# Error in solve.default(my.model$hessian) : 
#  Lapack routine dgesv: system is exactly singular: U[3,3] = 0

mySE <- diag(covmat)^0.5
mySE
Mark Miller
  • 12,483
  • 23
  • 78
  • 132
  • This is a lot. Can you make the example a little more compact, e.g. by using only two years rather than 10? – Ben Bolker Sep 02 '23 at 00:55
  • @BenBolker Thanks, Ben. I changed the code from 10 years to two years. I thought I might have to change `year` to an indicator variable, but it seems to work still as a continuous variable. – Mark Miller Sep 02 '23 at 19:13

0 Answers0