0

I am attempting to write code for a one-parameter logistic model in R. I have a large df of country-year observations from 1946 to 2021. Here is the code I have written thus far:

# read data
data <- read.csv("powers.csv")

# write a function for 1PL IRT model
# Define the logistic function
logistic <- function(theta, b) {
  exp(b*theta) / (1 + exp(b*theta))
}

# Define the likelihood function for the 1PL IRT model
irt.likelihood <- function(theta, b, y) {
  p <- logistic(theta, b)
  likelihood <- prod(p^y * (1-p)^(1-y))
  return(likelihood)
}

# Fit the 1PL IRT model using maximum likelihood estimation
fit.irt <- function(y, b.init) {
  mle <- optim(par=b.init, fn=function(b) -sum(log(irt.likelihood(theta, b, y))),
               method="BFGS", hessian=TRUE)
  return(mle)
}

# Step 1: Prepare the data data

# Drop X column (artifact of way CSV files are saved in R)
data <- data %>%
  dplyr::select(-X)

# I have a data frame called "data" with the following columns:
# "year", "country_name", "v2exremhsp_ord", "v2exdfdshs_ord", ..., "v2lgwarlaw"

# Step 2: Define theta and y
# Create a separate theta for each country
theta <- rep(NA, nrow(data))

# Define y as a matrix with binary response variables
y <-
  as.matrix(data[, c(
    "v2exremhsp_ord",
    "v2exdfdshs_ord",
    "v2exdfvths_ord",
    "v2exdfpphs_ord",
    "v2lgqstexp_ord",
    "v2lginvstp_ord",
    "v2lgoppart_ord",
    "v2lgfunds_ord",
    "v2lgcrrpt_ord",
    "v2lglegpup_ord",
    "v2lglegplo_ord",
    "v2exaphogp",
    "v2lgcomslo_ord",
    "v2lgsrvlo_ord",
    "v2lgstafflo_ord",
    "v2lgintbup",
    "v2exaphos",
    "v2ex_legconhos",
    "v2lgintblo",
    "v2lgtreaty",
    "v2lgwarlaw"
  )])

# Step 3: Estimate difficulty
# Define a log-likelihood function for the one-parameter logistic model
loglik <- function(b, y, theta) {
  p <- 1 / (1 + exp(-b * theta))
  p <- clamp(p,0,1)
  ll <- sum(y * log(p) + (1 - y) * log(1 - p))
  return(-ll)
}

# Initialize b.init
b.init <- rep(NA, 21)

# Estimate the difficulty for each item using optim()
for(i in 1:21){
  fit <- optim(par = b.init[i], fn = loglik, y = y[,i], theta = theta, method = "CG")
  b.init[i] <- fit$par
}

When I attempt to # Estimate the difficulty for each item using optim(), I receive the following error: Error in optim(par = b.init[i], fn = loglik, y = y[, i], theta = theta, : non-finite value supplied by optim. What would be the best way to deal with this issue?

Here is a sample of my data:

structure(list(year = 1946:1951, country_name = c("Mexico", "Mexico", 
"Mexico", "Mexico", "Mexico", "Mexico"), v2exremhsp_ord = c(0L, 
0L, 0L, 0L, 0L, 0L), v2exdfdshs_ord = c(0L, 0L, 0L, 0L, 0L, 0L
), v2exdfvths_ord = c(1L, 1L, 1L, 1L, 1L, 1L), v2exdfpphs_ord = c(0L, 
0L, 0L, 0L, 0L, 0L), v2lgqstexp_ord = c(0L, 0L, 0L, 0L, 0L, 0L
), v2lginvstp_ord = c(0L, 0L, 0L, 0L, 0L, 0L), v2lgoppart_ord = c(0L, 
0L, 0L, 0L, 0L, 0L), v2lgfunds_ord = c(1L, 1L, 1L, 1L, 1L, 1L
), v2lgcrrpt_ord = c(0L, 0L, 0L, 0L, 0L, 0L), v2lglegpup_ord = c(1L, 
1L, 1L, 1L, 1L, 1L), v2lglegplo_ord = c(1L, 1L, 1L, 1L, 1L, 1L
), v2exaphogp = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), v2lgcomslo_ord = c(1L, 1L, 1L, 1L, 
1L, 1L), v2lgsrvlo_ord = c(0L, 0L, 0L, 0L, 0L, 0L), v2lgstafflo_ord = c(1L, 
1L, 1L, 1L, 1L, 1L), v2lgintbup = c(0L, 0L, 0L, 0L, 0L, 0L), 
    v2exaphos = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), v2ex_legconhos = c(0L, 0L, 0L, 
    0L, 0L, 0L), v2lgintblo = c(1L, 1L, 1L, 1L, 1L, 1L), v2lgtreaty = c(1L, 
    1L, 1L, 1L, 1L, 1L), v2lgwarlaw = c(1L, 1L, 1L, 1L, 1L, 1L
    )), row.names = c(NA, 6L), class = "data.frame")
Phil
  • 7,287
  • 3
  • 36
  • 66
w5698
  • 159
  • 7

0 Answers0