8

I am trying to adapt the glm method from caret::train to be used for a Generalized Linear Mixed Effects Model. I have used the following code to set-up the function:

`GLMERmod <- list(type="Classification", library="lme4", loop=NULL)

parameters <- data.frame(parameter="parameter", class="character", 
label="parameter")

GLMERmod$parameters <- parameters


grid <- function (x, y, len = NULL, search = "grid"){
  data.frame(parameter = "none")
 } 



 GLMERmod$grid <- grid


 fit <- 
 function (x, y, wts, param, lev, last, classProbs, ...) 
 {
    dat <- if (is.data.frame(x)) 
        x
    else as.data.frame(x)
    dat$.outcome <- y
    if (length(levels(y)) > 2) 
        stop("glm models can only use 2-class outcomes")
    theDots <- list(...)
    if (!any(names(theDots) == "family")) {
         theDots$family <- if (is.factor(y)) 
             binomial()
        else gaussian()
    }
    if (!is.null(wts)) 
        theDots$weights <- wts
    modelArgs <- c(list(formula = as.formula(".outcome ~ . + (1 | IDNO)"), 
        data = dat, control=glmerControl(optimizer="bobyqa"), nAGQ=10, 
theDots))
    out <- do.call("glmer", modelArgs)
    out$call <- NULL
    out
}

GLMERmod$fit <- fit



   predict <- 
    function (modelFit, newdata, submodels = NULL) 
    {
        if (!is.data.frame(newdata)) 
            newdata <- as.data.frame(newdata)
        if (modelFit$problemType == "Classification") {
            probs <- predict(modelFit, newdata, type = "response")
            out <- ifelse(probs < 0.5, modelFit$obsLevel[1], 
     modelFit$obsLevel[2])
        }
        else {
           out <- predict(modelFit, newdata, type = "response")
        }
        out
    }

   GLMERmod$predict <- predict

   prob <- 
    function (modelFit, newdata, submodels = NULL) 
    {
        if (!is.data.frame(newdata)) 
            newdata <- as.data.frame(newdata)
        out <- predict(modelFit, newdata, type = "response")
        out <- cbind(1 - out, out)
        dimnames(out)[[2]] <- modelFit$obsLevels
        out
    }

GLMERmod$prob <- prob



varImp <-
function (object, ...) 
{
    values <- summary(object)$coef
    varImps <- abs(values[-1, grep("value$", colnames(values))])
    out <- data.frame(varImps)
    colnames(out) <- "Overall"
    if (!is.null(names(varImps))) 
        rownames(out) <- names(varImps)
    out
}

GLMERmod$varImp <- varImp



predictors <-
function (x, ...) 
predictors(x$terms)

GLMERmod$predictors <- predictors



levels <- 
function (x) 
if (any(names(x) == "obsLevels")) x$obsLevels else NULL

GLMERmod$levels <- levels


trim <- 
function (x) 
{
    x$y = c()
    x$model = c()
    x$residuals = c()
    x$fitted.values = c()
    x$effects = c()
    x$qr$qr = c()
    x$linear.predictors = c()
    x$weights = c()
    x$prior.weights = c()
    x$data = c()
    x$family$variance = c()
    x$family$dev.resids = c()
    x$family$aic = c()
    x$family$validmu = c()
    x$family$simulate = c()
    attr(x$terms, ".Environment") = c()
    attr(x$formula, ".Environment") = c()
    x
}

GLMERmod$trim <- trim


sort <- 
function (x) 
x

GLMERmod$sort <- sort`

This is a simple adaptation of the code for the glm method. However, when I run my model (as below), I am receiving many error messages.

I have tried to use the basic glmer function, but have issues with scaling (train would make this easy to deal with as well as cross validation and model diagnostics).

> glmer1 <- glmer(Case.Status ~ . + (1 | IDNO), data=TB_Train.glmer,
family=binomial, control=glmerControl(optimizer="bobyqa"), nAGQ=10)
fixed-effect model matrix is rank deficient so dropping 21 columns / 
coefficients
Some predictor variables are on very different scales: consider 
rescalingmaxfun < 10 * length(par)^2 is not recommended.
Error in na.fail.default(list(Case.Status = c(2L, 2L, 2L, 2L, 1L, 1L, : 
missing values in object

Then, I try to use train with this code:

GLMER <- train(Case.Status ~ . + (1 | IDNO), data=TB_Train.glmer, 
method=GLMERmod, trControl=trainControl(method="none", classProbs = TRUE), 
preProc = c("center", "scale"),   metric="ROC")

and I get this error:

�|� not meaningful for factors
 Show Traceback
 Error in na.fail.default(list(Case.Status = c(2L, 2L, 2L, 2L, 1L, 1L, : 
 missing values in object

Convert the IDNO variable to numeric:

These variables have zero variances: 1 | IDNOTRUEthe condition has length > 1 
and only the first element will be usedfixed-effect model matrix is rank 
deficient so dropping 1 column / coefficient
 Show Traceback
Error: inherits(family, "family") is not TRUE

Spell out all variables to avoid zero variance warning, but still:

These variables have zero variances: 1 | IDNOTRUE
 Show Traceback
Error in model.frame.default(data = list(Alcohol.Use = c(2.60174716798288, : 
variable lengths differ (found for 'IDNO')

Anyone know where these errors are coming from and how I can fix them?

daileyco
  • 743
  • 5
  • 13

0 Answers0