4

I can't figure out what I'm doing wrong with this XOR neural network. Maybe I'm not computing the loss correctly? The loss improves slightly at the beginning, and then the accuracy converges to 50% very quickly. Could someone please point out what I'm doing wrong?

Here's a minimal self contained example:

import numpy as np
import tensorflow as tf

n_inputs = 2
n_hidden = 3
n_outputs = 1

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.float32, shape=(None), name='y')

def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="weights")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else: return Z

with tf.name_scope('nn'):
    hidden = neuron_layer(X, n_hidden, name='hidden', activation=tf.nn.sigmoid)
    prediction_probabilities = neuron_layer(hidden, n_outputs, name='outputs', activation=tf.nn.sigmoid)

with tf.name_scope('loss'):
    mse_loss = tf.reduce_mean(tf.squared_difference(y, prediction_probabilities), name='loss')    

learning_rate = 0.1

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(mse_loss)

with tf.name_scope('eval'):
    correct = tf.equal(tf.greater_equal(prediction_probabilities,0.5), tf.cast(y,tf.bool))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

X_train = [
    (0, 0),
    (0, 1),
    (1, 0),
    (1, 1)
]
y_train = [0,1,1,0]

with tf.Session() as sess:
    init.run()
    for epoch in range(500):
        _, mse, acc = sess.run([training_op, mse_loss, accuracy], 
                               feed_dict={X: np.array(X_train), y: np.array(y_train)})
        print("mse: %.4f, accuracy: %.2f" % (mse, acc))
Austin
  • 6,921
  • 12
  • 73
  • 138

1 Answers1

3

Your code is perfectly fine. The problem is with your input. You need to pass a 2D list, not 1D. Each inner list is a single dimension input, that's how tensorflow will parse them.

y_train = [[0],[1],[1],[0]]

Your code works nicely after.

...
mse: 0.0002, accuracy: 1.00
cs95
  • 379,657
  • 97
  • 704
  • 746
  • Ah!! thank you! It would have taken me forever to realize that since I actually had it that way previously and mistakenly changed it due to a previous error. – Austin Aug 13 '18 at 01:46