0

I am trying to simulate data based on two models of linear mixed model. The first model is included random intercept and for fixed there are intercept and slope

library(nlme)
library(mixAK)


 fm1 <- lme(distance ~ age , data = Orthodont, random = ~ 1) 
set.seed(2425)
    simfun <- function(n) {
        # n is the number of subjects, total rows will be 4*n
        # sig.b0 is the st. dev. of the random intercepts
        # it might be easier to just copy from the output
        sig.b0 <- exp(unlist(fm1$modelStruct$reStruct))*fm1$sigma
        b0 <- rnorm(n, 0, sig.b0)

    fe <- fixef(fm1)
    my.df <- data.frame( Subject=rep(1:n, each=4), 
        int = fe[1] + rep(b0, each=4), 
        age=rep( c(8,10,12,14), n ) )
    my.df$distance <- my.df$int + fe[2] * my.df$age + 
       rnorm(n*4, 0, fm1$sigma)

    my.df$int <- NULL
       my.df
}

Orthodont1 <- simfun(100)

the summary for real data:

summary(Orthodont) # original data 
 distance          age          Subject       Sex    
 Min.   :16.50   Min.   : 8.0   M16    : 4   Male  :64  
 1st Qu.:22.00   1st Qu.: 9.5   M05    : 4   Female:44  
 Median :23.75   Median :11.0   M02    : 4              
 Mean   :24.02   Mean   :11.0   M11    : 4              
 3rd Qu.:26.00   3rd Qu.:12.5   M07    : 4              
 Max.   :31.50   Max.   :14.0   M08    : 4              
                                (Other):84  

the summary for first simulated

 summary(Orthodont1) 
      Subject            age          distance    
     Min.   :  1.00   Min.   : 8.0   Min.   :15.26  
     1st Qu.: 25.75   1st Qu.: 9.5   1st Qu.:22.28  
     Median : 50.50   Median :11.0   Median :24.22  
     Mean   : 50.50   Mean   :11.0   Mean   :24.09  
     3rd Qu.: 75.25   3rd Qu.:12.5   3rd Qu.:26.09  
     Max.   :100.00   Max.   :14.0   Max.   :33.22  

I run the first model successfully, but when I run the second model the simulated data is not similar to real data.
Here the second model that I simulated:

fm2 <- lme(distance ~ age , data = Orthodont, random = ~ 1+ age|Subject) 
set.seed(2425)
simfun <- function(n) {
    # n is the number of subjects, total rows will be 4*n

    # sig.b0 is the st. dev. of the random intercepts
    # it might be easier to just copy from the output
    sig.b0 <- getVarCov(fm2)[1:2,1:2]
    b0 <- rMVN(n,c(0, 0), sig.b0)$x

    fe <- fixef(fm2)
    my.df <- data.frame( Subject=rep(1:n, each=4), 
        int = fe[1] + rep(b0[,1], each=4), 
        age=rep( c(8,10,12,14), n ) )
    my.df$distance <- my.df$int + 
                   ( fe[2] + rep(b0[,2], each=4)) * my.df$age + 
       rnorm(n*4, 0, fm2$sigma)

    my.df$int <- NULL
       my.df
}

Orthodont2 <- simfun(100)
> summary(Orthodont)
    distance          age          Subject       Sex    
 Min.   :16.50   Min.   : 8.0   M16    : 4   Male  :64  
 1st Qu.:22.00   1st Qu.: 9.5   M05    : 4   Female:44  
 Median :23.75   Median :11.0   M02    : 4              
 Mean   :24.02   Mean   :11.0   M11    : 4              
 3rd Qu.:26.00   3rd Qu.:12.5   M07    : 4              
 Max.   :31.50   Max.   :14.0   M08    : 4              
                                (Other):84              
> summary(Orthodont2)
    Subject            age          distance       
 Min.   :  1.00   Min.   : 8.0   Min.   :-178.660  
 1st Qu.: 25.75   1st Qu.: 9.5   1st Qu.:  -7.904  
 Median : 50.50   Median :11.0   Median :  20.634  
 Mean   : 50.50   Mean   :11.0   Mean   :  27.006  
 3rd Qu.: 75.25   3rd Qu.:12.5   3rd Qu.:  61.831  
 Max.   :100.00   Max.   :14.0   Max.   : 213.611  

I do not know when the model included intercept and slope for random effects(model 2) the range of simulated data is different from real data.

May give me any suggestion ?

alexwhitworth
  • 4,839
  • 5
  • 32
  • 59
R. Saeiti
  • 89
  • 3
  • 11

0 Answers0