0

I am building a multivariate mixed model using LME4 (as I find it more simple to understand the output). However I can't get it to converge, the code takes forever to run. I started by melting 18 columns into 1 (trait). Then I tried to create a model with variable1 to explain ResponseValue. I have about 13 variables in my dataframe with 14000 observations, of which I want to include around 7 (fixed-effects) variables in the model at a time. Even when I start with the minimum the code never ends running.

I have tried running default optimizer (no settings) and removing variables from the model. It seems to help but still doesn't get anywhere.



multiVlm.trait.1stModel<- lmer(ResponseValue ~ trait:(variable1) - 1 +
                      (trait-1|RandomID),
                  data=data_melt,
                  control=lmerControl(optCtrl=list( maxfun=30e3),
                                      optimizer="bobyqa"),verbose=1)

I would like to get the model to converge, however when I don't set maxfun I get:

boundary (singular) fit: see ?isSingular

When I set maxfun to a value it tells me :

maxfun < 10 * length(par)^2 is not recommended.

In both cases I don't get any model.

So I am guessing either I don't understand how to set my fixed and random variables for multivariate testing or I don't understand how to set the maxfun and other optimizer parameters. Or there is something wrong in my dataframe.

Here's part of the dataframe :

structure(list(RandomID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16"), class = "factor"), Day = c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L), Exposed = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L), .Label = "1", class = "factor"), timestamp = c(2L, 17L, 
42L, 67L, 90L, 112L, 129L, 134L, 178L, 199L, 221L, 336L, 352L, 
419L, 450L, 456L, 462L, 476L, 481L, 29L, 50L, 72L, 94L, 116L, 
138L, 160L, 178L, 324L, 347L, 369L, 391L, 399L, 413L, 418L, 1L, 
27L, 85L, 107L, 129L, 150L, 172L, 262L, 289L, 303L, 326L, 371L, 
375L, 88L, 111L, 133L), variable1 = c(59.372, 59.977, 60.466, 
60.906, 61.698, 62.671, 63.242, 63.332, 64.452, 64.55, 64.673, 
66.53, 66.53, 66.53, 66.695, 66.721, 66.723, 66.723, 66.738, 
62.521, 63.546, 63.564, 63.753, 63.828, 64.037, 64.069, 64.069, 
66.202, 66.307, 66.316, 66.341, 66.891, 67.38, 67.819, 57.51, 
61.236, 63.751, 63.875, 63.899, 63.932, 65.81, 67.069, 68.055, 
69.573, 69.983, 70.191, 70.191, 58.2, 62.542, 63.238), variable2 = c(36.062, 
36.243, 36.272, 36.306, 36.31, 51.454, 53.834, 53.855, 55.264, 
55.91, 56.513, 57.074, 57.075, 57.076, 57.359, 57.375, 57.385, 
57.388, 57.388, 38.124, 47.252, 47.28, 47.292, 47.304, 47.426, 
47.461, 47.461, 49.403, 50.46, 50.483, 50.502, 50.502, 50.502, 
50.51, 23.376, 37.496, 44.286, 44.498, 45.09, 45.413, 45.633, 
45.633, 45.667, 45.667, 51.884, 53.045, 53.045, 44.952, 50.338, 
51.322), variable3 = c(1.5492, 1.7853, 1.8325, 1.9413, 1.9166, 
1.2181, 1.2683, 1.4044, 1.2766, 1.0517, 0.9404, 0.86413, 0.86162, 
0.86187, 0.88442, 0.89719, 0.89208, 0.8995, 0.89472, 1.9685, 
1.6237, 1.6467, 1.6246, 1.7229, 1.7692, 1.7483, 1.7461, 1.7623, 
1.73, 1.7317, 1.797, 1.8062, 1.8074, 1.7986, 1.7179, 2.2422, 
2.1168, 1.9199, 1.5924, 1.4847, 1.4686, 1.4847, 1.6072, 1.6003, 
1.5521, 1.5551, 1.5551, 0.74516, 1.5343, 1.6129), variable5 = c(3.0611, 
2.9752, 2.9748, 2.9761, 2.9747, 2.9816, 2.9849, 2.9858, 2.9893, 
2.9903, 2.9914, 2.9949, 2.9949, 2.9949, 2.9951, 2.9954, 2.9954, 
2.9955, 2.9956, 2.9746, 2.9827, 2.9875, 2.9904, 2.9922, 2.9928, 
2.9938, 2.995, 2.9954, 2.9948, 2.9953, 2.9957, 2.9958, 2.9958, 
2.9959, 3.2036, 3.0089, 2.9863, 2.9893, 2.9895, 2.9905, 2.9912, 
2.9912, 2.9926, 2.9931, 3.0028, 2.9987, 2.9987, 3.1721, 2.9983, 
2.9919), variable4 = c(31.727, 35.866, 35.858, 35.593, 35.875, 
39.804, 42.086, 42.055, 43.687, 44.678, 45.65, 45.609, 45.609, 
45.619, 45.829, 45.933, 46.049, 46.038, 46.018, 39.11, 41.452, 
42.212, 42.33, 43.152, 43.593, 44.888, 45.179, 45.408, 45.679, 
46.329, 46.587, 46.627, 46.62, 46.624, 32.531, 38.681, 40.181, 
41.094, 43.249, 43.769, 44.035, 44.029, 43.693, 44.74, 46.067, 
46.611, 46.611, 35.511, 38.619, 40.678), Age = c(44L, 44L, 44L, 
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L), Sex = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L), .Label = c("0", "1"), class = "factor"), trait = c("L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", 
"L1_60_f2_6169"), ResponseValue = c(8.1087, 1.2776, 0.70667, 
1.5856, 1.3246, 12.45, 3.0172, 1.1216, 20.958, 12.753, 11.239, 
3.1055, 8.3986, 2.8289, 3.2187, 3.7314, 5.1144, 1.0522, 1.9444, 
0.7096, 5.9223, 5.3697, 3.2527, 5.1819, 4.8512, 8.6721, 3.0177, 
8.363, 7.9323, 7.1409, 5.5397, 5.1321, 9.1606, 8.4178, 1.6346, 
6.4326, 2.3074, 7.032, 7.6211, 5.0022, 6.5688, 10.239, 1.4024, 
5.633, 8.108, 7.1154, 5.8687, 5.8858, 6.1624, 5.9662)), row.names = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 
43L, 44L, 45L, 46L, 47L, 48L, 52L, 53L, 54L), class = "data.frame")

  • can you post your data ? – Mike Aug 16 '19 at 14:36
  • Here's part of the data (can't post the whole thing). [link](https://www.dropbox.com/s/4zjs7tv5k8k7n6d/Example_MultiV_MModel.csv?dl=0) – Vincent Nadon Aug 16 '19 at 15:17
  • I cannot access that data my company blocks dropbox, you can use `dput(head(df,50))` and post the contents of the output into your question so people can at least see the structure. – Mike Aug 16 '19 at 16:36
  • How about with [Googlesheets](https://docs.google.com/spreadsheets/d/1RaOzboaZoEcuHZFU2xjHxoRVqQc8-4_722ehu9IbmFk/edit?usp=drivesdk)? – Vincent Nadon Aug 16 '19 at 17:14
  • can you say a little bit more about your analysis? Do you melt your data into a 7*14000-row data set for modeling purposes? – Ben Bolker Aug 19 '19 at 01:18
  • @BenBolker Exactly, I melted 18 columns into 1 (trait). Then I tried to create a model with variable1 to explain ResponseValue – Vincent Nadon Aug 19 '19 at 12:38
  • @Mike are you able to read the data now? – Vincent Nadon Aug 19 '19 at 12:39

1 Answers1

0

This is going to be hard (impossible?) to debug without a reproducible example.

When I run your model with the subset of the data you provided, the model takes about 6 seconds to fit, running 931 iterations. So that doesn't help figure out what's going on ... (do you succeed if you run the model with only this subset?)

A singular fit is not necessarily a mistake: it's a reasonably likely outcome when you're trying to fit a large variance-covariance matrix (see details in ?isSingular)

A couple of small tips: (1) you can set verbose=100 to get a lot of progress information printed as the optimizer is working; (2) using calc.derivs=FALSE in your control settings will turn of post-fitting derivative calculations (which can be slow for large models). Fitting a similar set of simulated data took 432 iterations, about 1 minute on my old Macbook pro. The same model (without the control/verbose settings) also runs in glmmTMB - took about twice as long.

Here's my simulated data:

library(lme4)
set.seed(101)
n <- 14000
nt <- 7
dd <- as.data.frame(replicate(nt,rnorm(n)))
names(dd) <- sprintf("trait%d",1:nt)
dd$ID <- 1:n
## melt data to long format, preserve ID
## your "variable" is my "x"
ddm <- tidyr::gather(dd,trait,x,-c(ID))
## parameters (chosen rather arbitrarily)
np <- list(beta=rep(1,nt),
           theta=rep(0.5,nt*(nt+1)/2),
           sigma=1)
## I'm not quite sure how you're getting away without
## this ...
options(lmerControl=list(check.nobs.vs.nRE="warning"))
ddm$y <- simulate(~trait:x-1 + (trait-1|ID),
                  newdata=ddm,
                  family=gaussian,
                  newparams=list(beta=rep(1,nt),
                                 theta=rep(0.5,nt*(nt+1)/2),
                                 sigma=1))[[1]]

Now fit:

m1 <- lmer(y~trait:x-1 + (trait-1|ID),
           data=ddm,
           verbose=100,
           control=lmerControl(check.nobs.vs.nRE="warning",
                               calc.derivs=FALSE))

This pauses for a few seconds (maybe as much as 10s, on my old Macbook Pro) and then starts printing progress reports of the form

iteration: 164 x = ( 0.851012, 0.376852, 0.339929, 0.364975, 0.324903, 0.357866, 0.375031, 0.981579, 0.520984, 0.558344, 0.493708, 0.531058, 0.572938, 1.030928, 0.639176, 0.651585, 0.589269, 0.619625, 1.003574, 0.599344, 0.576374, 0.540512, 1.033954, 0.576085, 0.585772, 0.999795, 0.563064, 1.035849 )

and produces a reasonably plausible estimate of the variance-covariance matrix:

> VarCorr(m1)
 Groups   Name        Std.Dev. Corr                               
 ID       traittrait1 0.73486                                     
          traittrait2 0.89078  0.379                              
          traittrait3 1.03700  0.300 0.534                        
          traittrait4 1.13949  0.314 0.499 0.642                  
          traittrait5 1.25914  0.271 0.433 0.569 0.702            
          traittrait6 1.32450  0.260 0.426 0.538 0.653 0.749      
          traittrait7 1.43165  0.245 0.395 0.506 0.605 0.700 0.782

The 'true' standard devs:

## lower-triangular matrix, all == 0.5
m <- matrix(0.5,7,7)
m[col(m)>row(m)] <- 0
V <- m %*% t(m)
round(sqrt(diag(V)),3)
## [1] 0.500 0.707 0.866 1.000 1.118 1.225 1.323

True correlations:

round(cov2cor(V),3)
## 0.707 
## 0.577 0.816 
## 0.500 0.707 0.866 
## 0.447 0.632 0.775 0.894 
## 0.408 0.577 0.707 0.816 0.913
## 0.378 0.535 0.655 0.756 0.845 0.926 

So correlations look a little low (maybe sigma is high in this particular realization?), but not crazy.

Ben Bolker
  • 211,554
  • 25
  • 370
  • 453
  • Thanks @Ben Bolker . I have tried to fit the model on the 5000 observations given as example with the optimizer parameters you specified and I was able to build the model within a few minutes. However, when I build the model on 14000 observations it takes around 9 minutes with one variable. At least now I can build the models and see the output. I'm currently adding X variables to the model to see if it is still able to fit a model. Any suggestions on how to optimize the execution time? – Vincent Nadon Aug 19 '19 at 21:12
  • Do you know a good way to visualize the coefficients and their p-value in a table? I used to do it with `stargazer` ,however it doesn't work with multivariate models? – Vincent Nadon Aug 19 '19 at 21:44
  • You could see if `glmmTMB` is any faster (although in my experiment it was slower). Doug Bates has Julia code that is probably faster, but getting it running could be a big investment (https://github.com/palday/lme4-vs-MixedModels.jl). I would try `broom.mixed::tidy()` for output, maybe in conjunction with `huxtable`; you could take a look at the last section of https://github.com/glmmTMB/glmmTMB/blob/master/glmmTMB/vignettes/model_evaluation.Rnw . Follow-ups to `r-sig-mixed-models@r-project.org`, please ... – Ben Bolker Aug 19 '19 at 22:12
  • I am unable to run your simulated data. I have just sent you an email also because I am unable to make the model converge anymore (with same settings as I used previously). Maybe I used the reduced dataframe instead of the full one previously – Vincent Nadon Aug 21 '19 at 20:09
  • I have just realized that in your simulation you melt the Xs instead of the Ys. I think this is not what I wanted to do. I find your example [here](https://mac-theobio.github.io/QMEE/MultivariateMixed.html) where you melt the Ys into one column corresponds more to what I want to do... Unless I did not understand? Also could you explain what `y ~ trait:x` does? – Vincent Nadon Aug 21 '19 at 21:15
  • If it's computational feasible, you could use the brms-package to fit a multivariate response model. Tabular output is possible with the sjPlot package, see examples [here (for Bayes multivariate)](https://strengejacke.github.io/sjPlot/articles/tab_bayes.html) or [here (for Frequentist mixed)](https://strengejacke.github.io/sjPlot/articles/tab_mixed.html). – Daniel Aug 22 '19 at 09:23
  • Hi @Daniel , I tried the brms package and was able to obtain a model however I have difficulty to analyse the results. How do I figure out which variables have significant effects? I also tried sjPlot package, with my model `tab_model(m1) ` however I get an error `Error in icc.brmsfit(model) : (list) object cannot be coerced to type 'logical'` – Vincent Nadon Aug 23 '19 at 16:26
  • According to your error, could you try `show.icc = FALSE` in `tab_model()`? Intrepreting Bayesian results is a bit different, as you don't have p-values.But you have other indices, like Bayes factor, or probability of directions etc. There is a short [JOSS-paper](https://doi.org/10.21105/joss.01541) that shows some of these indices, all of them implemented in the [bayestestR-package](https://easystats.github.io/bayestestR/). – Daniel Aug 23 '19 at 17:32