2

I am generating multinom models using nnet, with a model fitted for each city in the dataset. When I attempt to use tidy with these models, I get the following error:

Error in probs[i, -1, drop = FALSE] : subscript out of bounds

However, if I produce a model for each City separately, and then use tidy I do not receive an error for any of the models. I am also able to use glace without an error.

What might be causing this error?

library(broom)
library(dplyr)
library(nnet)

dfstack <- structure(list(Var1 = c(73L, 71L, 66L, 75L, 96L, 98L, 98L, 65L, 
75L, 74L, 71L, 98L, 100L, 87L, 78L, 50L, 73L, 82L, 70L, 70L, 
31L, 34L, 32L, 100L, 100L, 100L, 54L, 51L, 36L, 48L, 66L, 60L, 
59L, 72L, 76L, 90L, 85L, 76L, 55L, 53L, 42L, 54L, 54L, 10L, 34L, 
18L, 6L, 16L, 63L, 41L, 68L, 55L, 52L, 57L, 64L, 61L, 68L, 44L, 
33L, 19L, 38L, 54L, 44L, 87L, 100L, 100L, 63L, 75L, 76L, 100L, 
100L, 64L, 95L, 90L, 99L, 98L, 87L, 62L, 62L, 88L, 79L, 85L), 
    Status = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", 
    "B", "C"), class = "factor"), City = structure(c(3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 
    3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("Denver", "Miami", "NYC"), class = "factor"), 
    ID = structure(c(52L, 63L, 74L, 77L, 78L, 79L, 80L, 81L, 
    82L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 64L, 
    31L, 42L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 32L, 1L, 12L, 
    23L, 25L, 26L, 27L, 28L, 29L, 30L, 2L, 3L, 4L, 5L, 65L, 66L, 
    67L, 68L, 69L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 6L, 
    7L, 8L, 9L, 10L, 11L, 13L, 70L, 71L, 72L, 73L, 75L, 76L, 
    41L, 43L, 44L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 
    24L), .Label = c("Denver1", "Denver10", "Denver11", "Denver12", 
    "Denver13", "Denver14", "Denver15", "Denver16", "Denver17", 
    "Denver18", "Denver19", "Denver2", "Denver20", "Denver21", 
    "Denver22", "Denver23", "Denver24", "Denver25", "Denver26", 
    "Denver27", "Denver28", "Denver29", "Denver3", "Denver30", 
    "Denver4", "Denver5", "Denver6", "Denver7", "Denver8", "Denver9", 
    "Miami1", "Miami10", "Miami11", "Miami12", "Miami13", "Miami14", 
    "Miami15", "Miami16", "Miami17", "Miami18", "Miami19", "Miami2", 
    "Miami20", "Miami21", "Miami3", "Miami4", "Miami5", "Miami6", 
    "Miami7", "Miami8", "Miami9", "NYC1", "NYC10", "NYC11", "NYC12", 
    "NYC13", "NYC14", "NYC15", "NYC16", "NYC17", "NYC18", "NYC19", 
    "NYC2", "NYC20", "NYC21", "NYC22", "NYC23", "NYC24", "NYC25", 
    "NYC26", "NYC27", "NYC28", "NYC29", "NYC3", "NYC30", "NYC31", 
    "NYC4", "NYC5", "NYC6", "NYC7", "NYC8", "NYC9"), class = "factor")), class = "data.frame", row.names = c(NA, -82L), .Names = c("Var1", "Status", "City", "ID"))

Model.List <- dfstack %>% group_by(City) %>% do(modfits = multinom(Status~Var1, data=.))
tidy(Model.List, modfits) # produces error
glance(Model.List, modfits) # no error

# no error when each city on its own
df1 <- dfstack %>% filter(City == "NYC") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df1, modfit1)

df2 <- dfstack %>% filter(City == "Miami") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df2, modfit1)

df3 <- dfstack %>% filter(City == "Denver") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df3, modfit1)
nofunsally
  • 2,051
  • 6
  • 35
  • 53
  • I just ran it in the r console, and with RStudio and get the same error in either – nofunsally Dec 16 '16 at 17:05
  • I can tell you for sure that this is specific to `multinom`. Not sure if it may be related to the fact that your 3rd multinom model didn't converge. – Hack-R Dec 16 '16 at 17:53
  • Yeah, I tried it with lm(Var~Status) and it works just fine. Also `summary(Model.List$modfits[[1]])` produces the error so I don't think it has anything to do with tidy directly. – Bishops_Guest Dec 16 '16 at 17:59
  • `Model.List <- dfstack %>% group_by(City) %>% do(modfits = multinom(Status~Var1, maxit = 200, data=.))` Increasing the iterations results in convergence, but still produces the error. – nofunsally Dec 16 '16 at 19:30

1 Answers1

3

Don't ask me to explain why, but I figured it out.

tidy.multinom calls summary.multinom which calls vcov.multinom which calls multinomHess. The error was being generated down in multinomHess, which is only run when the Hessian matrix is not generated in the original call to multinom. That is to say, you don't necessarily need to spend the time calculating the Hessian matrix if you don't intend to use the summary object.

For some reason, when the multinom objects are formed within the do call, summary.multinom is unable to calculate the Hessian matrix. This can be circumvented by calling multinom with Hess = TRUE. See below:

Model.List <- 
  dfstack %>% 
  group_by(City) %>% 
  do(modfits = multinom(Status~Var1, 
                        data=., 
                        Hess = TRUE))
tidy(Model.List, modfits) 
glance(Model.List, modfits) 

In your original code, glance did not cast an error because glance.multinom does not rely on summary.multinom.

Benjamin
  • 16,897
  • 6
  • 45
  • 65
  • This was driving me nuts and I don't know if I ever would have figured it out on my own. Thanks! – David C Apr 16 '20 at 15:23