I am trying to train a 3D convnet with 3D spatial data using mxnet. When I run the program the loss decreases normally over epochs but the training accuracy and the test accuracy is staying exactly the same. I am very new to neural networks and I have no idea why this is happening. I am not sure if it is because my network parameters are bad or if my data isn't preprocessed properly, or if the code for training the network has problems, or something else entirely.
Here is the code I am currently using for the convnet and the output it produces:
train_data = mx.gluon.data.DataLoader(train_dataset, batch_size= 64,shuffle= True, num_workers = cpucount)
test_data = mx.gluon.data.DataLoader(test_dataset,batch_size= 64,shuffle= True, num_workers = cpucount)
batch_size = 64
num_inputs = 2541
num_outputs = 2
num_fc = 635
net = gluon.nn.Sequential()
with net.name_scope():
net.add(gluon.nn.Conv3D(channels=1, kernel_size=3, activation='relu'))
net.add(gluon.nn.MaxPool3D(pool_size=2, strides=2))
net.add(gluon.nn.Conv3D(channels=1, kernel_size=3, activation='relu'))
net.add(gluon.nn.MaxPool3D(pool_size=2, strides=2))
net.add(gluon.nn.Flatten())
net.add(gluon.nn.Dense(num_fc, activation="relu"))
net.add(gluon.nn.Dense(num_outputs))
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .0001})
def evaluate_accuracy(data_iterator, net):
acc = mx.metric.Accuracy()
for i, (data, label) in enumerate(data_iterator):
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
output = net(data)
predictions = nd.argmax(output,axis = 1)
label = label.reshape(len(label))
acc.update(preds=predictions, labels=label)
return acc.get()[1]
epochs = 100
smoothing_constant = .01
for e in range(epochs):
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(data.shape[0])
curr_loss = nd.mean(loss).asscalar()
moving_loss = (curr_loss if ((i == 0) and (e == 0))
else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
output: Epoch 0. Loss: 26525280.32107588, Train_acc 0.462039045553, Test_acc 0.386554621849 Epoch 1. Loss: 17045452.882872812, Train_acc 0.462039045553, Test_acc 0.386554621849 Epoch 2. Loss: 10953605.785322478, Train_acc 0.462039045553, Test_acc 0.386554621849 Epoch 3. Loss: 7038914.162310514, Train_acc 0.462039045553, Test_acc 0.386554621849 Epoch 4. Loss: 4523287.90677917, Train_acc 0.462039045553, Test_acc 0.386554621849 Epoch 5. Loss: 2906717.2884657932, Train_acc 0.462039045553, Test_acc 0.386554621849 Epoch 6. Loss: 1867890.253548351, Train_acc 0.462039045553, Test_acc 0.386554621849
(I omitted the rest of the epochs but even when loss was around 0.09 the accuracies were still the same)