Translate from sae.dnn (deepnet) to mx.mlp (mxnet) error

Question

I'm trying to translate code from deepnet to mxnet, but I'm not sure what am I doing wrong. I'm getting an error message that says:

"Error in nn$W[[i -1]] %*% t(post)". 
requires numeric/complex matrix/vector arguments 
Calls: neural.predict -> nn.predict -> t

The code using deepnet (wrote by Johann C. Lotter) is:

library('deepnet', quietly = T) 
library('caret', quietly = T)

neural.train = function(model,XY) 
{
  XY <- as.matrix(XY)
  X <- XY[,-ncol(XY)]
  Y <- XY[,ncol(XY)]
  Y <- ifelse(Y > 0,1,0)
  Models[[model]] <<- sae.dnn.train(X,Y,
      hidden = c(30,30,30), 
      activationfun = "tanh", 
      learningrate = 0.5, 
      momentum = 0.5, 
      learningrate_scale = 1.0, 
      output = "sigm", 
      sae_output = "linear", 
      numepochs = 100, 
      batchsize = 100,
      hidden_dropout = 0, 
      visible_dropout = 0)
}

neural.predict = function(model,X) 
{
  if(is.vector(X)) X <- t(X)
  return(nn.predict(Models[[model]],X))
}

neural.save = function(name)
{
  save(Models,file=name)  
}

neural.init = function()
{
  set.seed(365)
  Models <<- vector("list")
}

And for the mxnet translation I'm changing the neural train with:

library('mxnet', quietly = T) 

neural.train = function(model,XY) 
{
  XY <- as.matrix(XY)
  X <- XY[,-ncol(XY)]
  Y <- XY[,ncol(XY)]
  Y <- ifelse(Y > 0,1,0)
  Models[[model]] <<- mx.mlp(X,Y,
      hidden_node = c(30,30,30), 
      activation = "relu", 
      momentum = 0.9, 
      learning.rate = 0.07, 
      out_activation = "softmax",
      num_round = 100,
      out_node = 2,
      array.batch.size = 100)
}

I don't get what am I doing wrong..

score 1 · Accepted Answer · answered Mar 06 '18 at 01:07

Please, find the working code below. If for some reason it doesn't work on your machine, then check the version of mxnet you have. I am running it on mac with mxnet version 0.10.1.

Since you told that you want to copy the code as close as the example one, I have changed the values of the attributes to initial ones. Feel free to change them, if you need. For example, momentum of 0.5 seems like too little - usually a value of 0.9 and higher is used. While the value of learning rate of 0.5 is too big, usually learning rate is not higher than 0.1.

library('mxnet') 

neural.train = function(model,XY) 
{
  XY <- as.matrix(XY)
  X <- XY[,-ncol(XY)]
  Y <- XY[,ncol(XY)]
  Y <- ifelse(Y > 0,1,0)
  Models[[model]] <<- mx.mlp(X,Y,
                             hidden_node = c(30,30,30), 
                             activation = "tanh", 
                             momentum = 0.5, 
                             learning.rate = 0.5, 
                             out_activation = "softmax",
                             num.round = 100,
                             out_node = 2,
                             array.batch.size = 100,
                             dropout = 0,
                             array.layout = "rowmajor")
}

neural.predict = function(model,X) 
{
  if(is.vector(X)) X <- t(X)
  return(predict(Models[[model]], X, array.layout = "rowmajor"))
}

neural.save = function(name)
{
  save(Models,file=name)  
}

neural.init = function()
{
  set.seed(365)
  Models <<- vector("list")
}

Var1 <- c(rnorm(50, 1, 0.5), rnorm(50, -0.6, 0.2))
Var2 <- c(rnorm(50, -0.8, 0.2), rnorm(50, 2, 1))
Var3 <- sample(c(0,1), replace=T, size=100)
training.data <- matrix(c(Var1, Var2, Var3), nrow = 100, ncol = 3)

Var4 <- c(rnorm(50, 1, 0.5), rnorm(50, -0.6, 0.2))
Var5 <- c(rnorm(50, -0.8, 0.2), rnorm(50, 2, 1))
test.data <- matrix(c(Var4, Var5), nrow = 100, ncol = 2)


neural.init()
neural.train("mx_mlp_model", training.data)
neural.predict("mx_mlp_model", test.data)

After executing this, I get the following output:

> neural.predict("mx_mlp_model", test.data)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
[1,] 0.47 0.47 0.47 0.47 0.47 0.47 0.47 0.47 0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47
[2,] 0.53 0.53 0.53 0.53 0.53 0.53 0.53 0.53 0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53
     [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38] [,39]
[1,]  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47
[2,]  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53
     [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58]
[1,]  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47
[2,]  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53
     [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77]
[1,]  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47
[2,]  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53
     [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96]
[1,]  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47  0.47
[2,]  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53  0.53
     [,97] [,98] [,99] [,100]
[1,]  0.47  0.47  0.47   0.47
[2,]  0.53  0.53  0.53   0.53

Hope it helps.

great answer!. Are there other values that you would suggest to change, besides momentum and learning rate? I don't need to keep the exact values, if there are better solutions I'll be glad to know — PandemicCode, Mar 11 '18 at 09:35
Finding the best values of hyperparameters (learning rate, momentum and everything else that could be there) is a very hot research topic. Unfortunately there is no general answer, and in each particular situation the best parameters are found with a trial and error approach. But there are few general methods to do so. I recommend starting from - https://en.wikipedia.org/wiki/Hyperparameter_optimization — Sergei, Mar 12 '18 at 16:57

Translate from sae.dnn (deepnet) to mx.mlp (mxnet) error

1 Answers1