0

I'd like to fit a model using the mnlogit package and use it to make out-of-sample predictions. I've set up a toy example using the fishing data that comes with mnlogit:

library(data.table)
library(mnlogit)

data(Fish, package="mnlogit")
fish_dt <- data.table(Fish)
rm(Fish)

unique_id <- unique(fish_dt[, chid])
set.seed(54321)
train_id <- sample(unique_id, size=0.5*length(unique_id))

setkey(fish_dt, chid, alt)
train <- fish_dt[J(train_id)]
test <- fish_dt[!J(train_id)]
setkey(train, chid, alt)
setkey(test, chid, alt)
stopifnot(nrow(train) + nrow(test) == nrow(fish_dt))  # Partition fish_dt

mnlogit_formula <- mode ~ catch | income
mnlogit_model <- mnlogit(mnlogit_formula, data=train, choiceVar="alt")

train_predictions <- predict(mnlogit_model, probability=F)
stopifnot(length(train_predictions) == length(unique(train[, chid])))  # One per choice
mean(subset(train, mode)[, alt] == train_predictions)  # Around 0.42 accuracy in sample

## Would like to do the same out of sample, i.e. with data table "test"
test_predictions <- predict(mnlogit_model, newdata=test, probability=F)  # Error
test_predictions <- predict(mnlogit_model, newdata=as.data.frame(test), probability=F)  # Same error

The error I get is:

Error in colnames<-(*tmp*, value = list(chid = c(1L, 2L, 3L, 4L, 5L, : length of 'dimnames' [2] not equal to array extent

I'm running R version 3.0.2 (2013-09-25) on Ubuntu 14.04.2 LTS.

Am I using the package incorrectly or is this a bug?

Edit: See comments: I tried removing the "mode" column from the "test" data table, but that gives me a "newdata must have same columns as training data" error:

test[, mode := NULL]
mnlogit_predictions <- predict(mnlogit_model, newdata=test, probability=F)  # Error

Edit: Here's an example where I use the mlogit package (which is similar but can be significantly slower for large problems):

library(data.table)
library(mlogit)

data(Fish, package="mnlogit")
fish_dt <- data.table(Fish)
rm(Fish)

unique_id <- unique(fish_dt[, chid])
set.seed(54321)
train_id <- sample(unique_id, size=0.5*length(unique_id))

setkey(fish_dt, chid, alt)
train <- fish_dt[J(train_id)]
test <- fish_dt[!J(train_id)]
setkey(train, chid, alt)
setkey(test, chid, alt)
stopifnot(nrow(train) + nrow(test) == nrow(fish_dt))  # Partition fish_dt

train_mlogit <- mlogit.data(train, choice="mode", shape="long",
                            chid.var="chid", alt.var="alt")
test_mlogit <- mlogit.data(test, choice="mode", shape="long",
                           chid.var="chid", alt.var="alt")

model_formula <- mode ~ catch | income
mlogit_model <- mlogit(model_formula, data=train_mlogit)

## In-sample performance
train_predictions <- predict(mlogit_model, newdata=train_mlogit)
stopifnot(nrow(train_predictions) == length(unique(train[, chid])))  # One per choice
train_predictions <- colnames(train_predictions)[apply(train_predictions, 1, which.max)]
mean(subset(train, mode)[, alt] == train_predictions)  # Around 0.42 accuracy in sample

## Out-of-sample performance
test_predictions <- predict(mlogit_model, newdata=test_mlogit)
test_predictions <- colnames(test_predictions)[apply(test_predictions, 1, which.max)]
mean(subset(test, mode)[, alt] == test_predictions)  # Around 0.41 accuracy out of sample

I'd like to do exactly that, but with mnlogit instead of mlogit.

Adrian
  • 3,138
  • 2
  • 28
  • 39
  • You can check the [github](https://github.com/cran/mnlogit) page for the package. One possible error may be defining the response variable (for test dataset) in your code. – S Das Apr 08 '15 at 21:14
  • @SubasishDas You mean the "mode" column in the "test" data frame? If I remove it I get a "must have same columns as training data" error. – Adrian Apr 10 '15 at 09:34

1 Answers1

0

Predict works well with mnlogit objects. However, your 'test' object is a data table and not mlogit.data object, so you would require to pass choiceVar in the predict call as well.

test_predictions <- predict(mnlogit_model, newdata=test,choiceVar="alt")  # works
test_predictions <- predict(mnlogit_model, newdata=as.data.frame(test), probability=F,choiceVar="alt")  # this also works

Thanks

user2778822
  • 71
  • 2
  • 11