I can't figure out what I'm doing wrong with this XOR neural network. Maybe I'm not computing the loss correctly? The loss improves slightly at the beginning, and then the accuracy converges to 50% very quickly. Could someone please point out what I'm doing wrong?
Here's a minimal self contained example:
import numpy as np
import tensorflow as tf
n_inputs = 2
n_hidden = 3
n_outputs = 1
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.float32, shape=(None), name='y')
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.get_shape()[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
W = tf.Variable(init, name="weights")
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
if activation is not None:
return activation(Z)
else: return Z
with tf.name_scope('nn'):
hidden = neuron_layer(X, n_hidden, name='hidden', activation=tf.nn.sigmoid)
prediction_probabilities = neuron_layer(hidden, n_outputs, name='outputs', activation=tf.nn.sigmoid)
with tf.name_scope('loss'):
mse_loss = tf.reduce_mean(tf.squared_difference(y, prediction_probabilities), name='loss')
learning_rate = 0.1
with tf.name_scope('train'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse_loss)
with tf.name_scope('eval'):
correct = tf.equal(tf.greater_equal(prediction_probabilities,0.5), tf.cast(y,tf.bool))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
X_train = [
(0, 0),
(0, 1),
(1, 0),
(1, 1)
]
y_train = [0,1,1,0]
with tf.Session() as sess:
init.run()
for epoch in range(500):
_, mse, acc = sess.run([training_op, mse_loss, accuracy],
feed_dict={X: np.array(X_train), y: np.array(y_train)})
print("mse: %.4f, accuracy: %.2f" % (mse, acc))