I am attempting to write code for a one-parameter logistic model in R
. I have a large df
of country-year observations from 1946 to 2021. Here is the code I have written thus far:
# read data
data <- read.csv("powers.csv")
# write a function for 1PL IRT model
# Define the logistic function
logistic <- function(theta, b) {
exp(b*theta) / (1 + exp(b*theta))
}
# Define the likelihood function for the 1PL IRT model
irt.likelihood <- function(theta, b, y) {
p <- logistic(theta, b)
likelihood <- prod(p^y * (1-p)^(1-y))
return(likelihood)
}
# Fit the 1PL IRT model using maximum likelihood estimation
fit.irt <- function(y, b.init) {
mle <- optim(par=b.init, fn=function(b) -sum(log(irt.likelihood(theta, b, y))),
method="BFGS", hessian=TRUE)
return(mle)
}
# Step 1: Prepare the data data
# Drop X column (artifact of way CSV files are saved in R)
data <- data %>%
dplyr::select(-X)
# I have a data frame called "data" with the following columns:
# "year", "country_name", "v2exremhsp_ord", "v2exdfdshs_ord", ..., "v2lgwarlaw"
# Step 2: Define theta and y
# Create a separate theta for each country
theta <- rep(NA, nrow(data))
# Define y as a matrix with binary response variables
y <-
as.matrix(data[, c(
"v2exremhsp_ord",
"v2exdfdshs_ord",
"v2exdfvths_ord",
"v2exdfpphs_ord",
"v2lgqstexp_ord",
"v2lginvstp_ord",
"v2lgoppart_ord",
"v2lgfunds_ord",
"v2lgcrrpt_ord",
"v2lglegpup_ord",
"v2lglegplo_ord",
"v2exaphogp",
"v2lgcomslo_ord",
"v2lgsrvlo_ord",
"v2lgstafflo_ord",
"v2lgintbup",
"v2exaphos",
"v2ex_legconhos",
"v2lgintblo",
"v2lgtreaty",
"v2lgwarlaw"
)])
# Step 3: Estimate difficulty
# Define a log-likelihood function for the one-parameter logistic model
loglik <- function(b, y, theta) {
p <- 1 / (1 + exp(-b * theta))
p <- clamp(p,0,1)
ll <- sum(y * log(p) + (1 - y) * log(1 - p))
return(-ll)
}
# Initialize b.init
b.init <- rep(NA, 21)
# Estimate the difficulty for each item using optim()
for(i in 1:21){
fit <- optim(par = b.init[i], fn = loglik, y = y[,i], theta = theta, method = "CG")
b.init[i] <- fit$par
}
When I attempt to # Estimate the difficulty for each item using optim()
, I receive the following error: Error in optim(par = b.init[i], fn = loglik, y = y[, i], theta = theta, : non-finite value supplied by optim
. What would be the best way to deal with this issue?
Here is a sample of my data:
structure(list(year = 1946:1951, country_name = c("Mexico", "Mexico",
"Mexico", "Mexico", "Mexico", "Mexico"), v2exremhsp_ord = c(0L,
0L, 0L, 0L, 0L, 0L), v2exdfdshs_ord = c(0L, 0L, 0L, 0L, 0L, 0L
), v2exdfvths_ord = c(1L, 1L, 1L, 1L, 1L, 1L), v2exdfpphs_ord = c(0L,
0L, 0L, 0L, 0L, 0L), v2lgqstexp_ord = c(0L, 0L, 0L, 0L, 0L, 0L
), v2lginvstp_ord = c(0L, 0L, 0L, 0L, 0L, 0L), v2lgoppart_ord = c(0L,
0L, 0L, 0L, 0L, 0L), v2lgfunds_ord = c(1L, 1L, 1L, 1L, 1L, 1L
), v2lgcrrpt_ord = c(0L, 0L, 0L, 0L, 0L, 0L), v2lglegpup_ord = c(1L,
1L, 1L, 1L, 1L, 1L), v2lglegplo_ord = c(1L, 1L, 1L, 1L, 1L, 1L
), v2exaphogp = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), v2lgcomslo_ord = c(1L, 1L, 1L, 1L,
1L, 1L), v2lgsrvlo_ord = c(0L, 0L, 0L, 0L, 0L, 0L), v2lgstafflo_ord = c(1L,
1L, 1L, 1L, 1L, 1L), v2lgintbup = c(0L, 0L, 0L, 0L, 0L, 0L),
v2exaphos = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), v2ex_legconhos = c(0L, 0L, 0L,
0L, 0L, 0L), v2lgintblo = c(1L, 1L, 1L, 1L, 1L, 1L), v2lgtreaty = c(1L,
1L, 1L, 1L, 1L, 1L), v2lgwarlaw = c(1L, 1L, 1L, 1L, 1L, 1L
)), row.names = c(NA, 6L), class = "data.frame")