3

I am trying to train a simple MLP model that maps input questions (using a 300D word embedding) and image features extracted using a pretrained VGG16 model to a feature vector of fixed length. However, I can't figure out how to fix the error mentioned below. Here is the code I'm trying to run at the moment:

parser = argparse.ArgumentParser()
parser.add_argument('-num_hidden_units', type=int, default=1024)
parser.add_argument('-num_hidden_layers', type=int, default=3)
parser.add_argument('-dropout', type=float, default=0.5)
parser.add_argument('-activation', type=str, default='tanh')
parser.add_argument('-language_only', type=bool, default= False)
parser.add_argument('-num_epochs', type=int, default=10) #default=100
parser.add_argument('-model_save_interval', type=int, default=10)
parser.add_argument('-batch_size', type=int, default=128)
args = parser.parse_args()

questions_train = open('data/qa/preprocess/questions_train2014.txt', 'r').read().splitlines()
answers_train = open('data/qa/preprocess/answers_train2014_modal.txt', 'r').read().splitlines()
images_train = open('data/qa/preprocess/images_train2014.txt', 'r').read().splitlines()
vgg_model_path = 'data/coco/vgg_feats.mat'
maxAnswers = 1000
questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

#encode the remaining answers
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(answers_train)
nb_classes = len(list(labelencoder.classes_))
joblib.dump(labelencoder,'models/labelencoder.pkl')

features_struct = scipy.io.loadmat(vgg_model_path)
VGGfeatures = features_struct['feats']
print ('loaded vgg features')
image_ids = open('data/coco/coco_vgg_IDMap.txt').read().splitlines()
id_map = {}
for ids in image_ids:
    id_split = ids.split()
    id_map[id_split[0]] = int(id_split[1])

nlp = English()
print ('loaded word2vec features...')
img_dim = 4096
word_vec_dim = 300

model = Sequential()
if args.language_only:
    model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
else:
    model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
model.add(Activation(args.activation))
if args.dropout>0:
    model.add(Dropout(args.dropout))
for i in range(args.num_hidden_layers-1):
    model.add(Dense(args.num_hidden_units, init='uniform'))
    model.add(Activation(args.activation))
    if args.dropout>0:
        model.add(Dropout(args.dropout))
model.add(Dense(nb_classes, init='uniform'))
model.add(Activation('softmax'))

json_string = model.to_json()
if args.language_only:
    model_file_name = 'models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
else:
    model_file_name = 'models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)     
open(model_file_name  + '.json', 'w').write(json_string)

print ('Compiling model...')
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
print ('Compilation done...')

print ('Training started...')
for k in range(args.num_epochs):
    #shuffle the data points before going through them
    index_shuf = list(range(len(questions_train)))
    shuffle(index_shuf)
    questions_train = [questions_train[i] for i in index_shuf]
    answers_train = [answers_train[i] for i in index_shuf]
    images_train = [images_train[i] for i in index_shuf]
    progbar = generic_utils.Progbar(len(questions_train))
    for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
                                        grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
                                        grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
        X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        if args.language_only:
            X_batch = X_q_batch
        else:
            X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
            X_batch = np.hstack((X_q_batch, X_i_batch))
        Y_batch = get_answers_matrix(an_batch, labelencoder)
        loss = model.train_on_batch(X_batch, Y_batch)            
        progbar.add(args.batch_size, values=[("train loss", loss)])
    #print type(loss)
    if k%args.model_save_interval == 0:
        model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

And here is the error I get:

Keras: Error when checking input: expected dense_9_input to have shape (4396,) but got array with shape (4096,)

today
  • 32,602
  • 8
  • 95
  • 115
Vanessa.C
  • 45
  • 1
  • 5

1 Answers1

2

I think that the error lies in what you pass in the else statement in the first layer of your model versus what you pass in training. In your first layer you specify:

model = Sequential()
if args.language_only:
    model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
else:
    model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))

You clearly pass input_dim = img_dim + word_vec_dim = 4096 + 300 = 4396. During training you pass:

X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
if args.language_only:
    X_batch = X_q_batch
else:
    X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
    X_batch = np.hstack((X_q_batch, X_i_batch))

So, in the else branch, X_batch will have X_q_batch or X_i_batch rows, which apparently = 4096. By the way, for debugging purposes, it would be easier to give your layers a name, e.g.

x = Dense(64, activation='relu', name="dense_one")

I hope this helps.

mgross
  • 550
  • 1
  • 7
  • 24