Training data worse than testing data

Question

I am trying to set up a neural network using Gluon and mxnet to implement the fizzbuzz program. However, it is giving me a weird result; the accuracy of the training data(35-42%) is significantly worse than the accuracy of the testing data(97-99%). I have used the numbers 101 through 1024 as the training dataset and the numbers 1 through 100 as the testing dataset.

So, why is the accuracy of the training data worse than that of the testing data? Isn't it supposed to be the other way round?

My code:

import numpy as np
from mxnet import gluon, nd, autograd
import mxnet as mx

ctx = mx.cpu()
mx.random.seed(1)

def binary_encode(i, digits):
    return np.array([i>>d&1 for d in range(digits)])
def fizzbuzz_encode(i):
    if i%15 == 0:
        return np.array([0, 0, 0, 1])
    elif i%5 == 0:
        return np.array([0, 0, 1, 0])
    elif i%3 == 0:
        return np.array([0, 1, 0, 0])
    else:
        return np.array([1, 0, 0, 0])

def fizzbuzz_decode(i, pred):
    if pred == 0:
        return i
    elif pred == 1:
        return 'fizz'
    elif pred == 2:
        return 'buzz'
    else:
        return 'fizzbuzz'

num_digits = 10         #number of digits in the input

trX = np.array([binary_encode(i, num_digits) for i in range(101, 2**num_digits)])
trY = np.array([fizzbuzz_encode(i) for i in range(101, 2**num_digits)])
tr_dataset = gluon.data.dataset.ArrayDataset(trX, trY)          #training dataset

testX = np.array([binary_encode(i, num_digits) for i in range(1, 101)])
testY = np.array([fizzbuzz_encode(i) for i in range(1, 101)])
test_dataset = gluon.data.dataset.ArrayDataset(testX, testY)    #testing dataset

hidden_layers = 1       #number of hidden layers
hidden_units = 100      #number of nodes in a hidden layer
batch_size = 32

train_data = gluon.data.DataLoader(tr_dataset, batch_size, shuffle=False)
test_data = gluon.data.DataLoader(test_dataset, batch_size, shuffle=False)

net = gluon.nn.Sequential()                                         #making the neural net
with net.name_scope():
    net.add(gluon.nn.Dense(hidden_units, activation='relu'))        #hidden layer
    net.add(gluon.nn.Dense(4))                                      #output layer

net.collect_params().initialize(mx.init.Normal(sigma=0.01), ctx=ctx)                #setting the initial weights and biases
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)      #loss function (Softmax Cross Entropy)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.05})        #setting up the optimizer (Stochastic Gradient Descent)

epochs = 1000

for e in range(epochs):                             #training procedure
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        data = data.astype(np.float32)
        label = label.as_in_context(ctx)
        label = label.astype(np.float32)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()

prediction = []
correct = []

for i, (data, label) in enumerate(test_data):
    data = data.as_in_context(ctx)
    data = data.astype(np.float32)
    for j in net(data):
        prediction.append(fizzbuzz_decode(len(prediction)+1, nd.argmax(j, axis=0)))     #prediction array

for i in prediction:
    print(i)                                    #prints the final output

for i, val in enumerate(testY):
    correct.append(fizzbuzz_decode(i+1, np.argmax(val, axis=0)))

accuracy = 0

for i in range(100):
    if prediction[i]==correct[i]:
        accuracy+=1

print('\nThe acuuracy of the training data is ' + str(accuracy) + '%')

What exactly is your question? – PhilMasteG Jun 17 '18 at 15:48 — PhilMasteG, Jun 17 '18 at 15:48
I have edited the question @PhilMasterG – Abhilash Mukherjee Jun 17 '18 at 15:54 — Abhilash Mukherjee, Jun 17 '18 at 15:54

score 0 · Answer 1 · answered Jun 18 '18 at 20:53

I played with your code a little, and I guess you have forgotten to replace test_data or testY in your loops to train_data and trY respectively in the following code blocks:

for i, (data, label) in enumerate(test_data):

and

for i, val in enumerate(testY):

If I do so, I get training accuracy 99% and test accuracy 97%.

Training data worse than testing data

1 Answers1