I'm getting started with neural networks and this kind of stuff, I understood how a perceptron
works and the logic behind feed-forward and backpropagation
mechanisms and I am now trying to write a simple multi-layer network with 3 neurons (2 in a hidden layer and 1 as output) which should be enough to execute a xor operation
.
I implemented it in Python (using v3.6.1 now) this way:
import numpy as np
class Neuron():
def __init__(self, n, w = None, b = None):
#np.random.seed(46144492)
#np.random.seed(23)
self.weights = 2 * np.random.random((n)) - 1 if w == None else np.array(w)
self.bias = 2 * np.random.random() - 1 if b == None else b
self.learning_rate = 0.1
self.weights_error = []
for i in range(n):
self.weights_error.append(0)
self.bias_error = 0
def learning_factor(self, output):
return output * (1 - output)
def fire(self, x):
return 1 / (1 + np.exp(-x))
def __call__(self, inputs):
weighted = []
for i in range(len(inputs)):
weighted.append(inputs[i] * self.weights[i])
weighted = np.array(weighted)
return self.fire(weighted.sum() + self.bias)
def adjust(self, n_weights):
for i in range(n_weights):
self.weights[i] -= self.weights_error[i]
self.bias -= self.bias_error
class HiddenNeuron(Neuron):
def calc_error(self, inputs, output, next_layer, number_in_layer):
error = 0
for n in range(len(next_layer)):
error += next_layer[n].delta * next_layer[n].weights[number_in_layer - 1]
derivative = self.learning_factor(output)
self.delta = error * derivative
self.weights_error = []
for i in range(len(inputs)):
self.weights_error.append(self.delta * inputs[i] * self.learning_rate)
self.bias_error = self.delta * self.learning_rate
class OutputNeuron(Neuron):
def calc_error(self, inputs, output, expected):
error = output - expected
derivative = self.learning_factor(output)
self.delta = error * derivative
self.weights_error = []
for i in range(len(inputs)):
self.weights_error.append(self.delta * inputs[i] * self.learning_rate)
self.bias_error = self.delta * self.learning_rate
# Network
n1, n2 = HiddenNeuron(2), HiddenNeuron(2)
n3 = OutputNeuron(2)
# Training data
training_set_in = [[0, 0], [0, 1], [1, 0], [1, 1]]
training_set_out = [0, 1, 1, 0]
# Training cycles
for i in range(10000):
for i in range(len(training_set_in)):
# Feed-forward
n1_out = n1(training_set_in[i])
n2_out = n2(training_set_in[i])
n3_in = [n1_out, n2_out]
n3_out = n3(n3_in)
# Backpropagation
n3.calc_error(n3_in, n3_out, training_set_out[i])
n2.calc_error(training_set_in[i], n2_out, [n3], 2)
n1.calc_error(training_set_in[i], n1_out, [n3], 1)
n1.adjust(2)
n2.adjust(2)
n3.adjust(2)
# "New" cases (test)
for new in [[0, 0], [0, 1], [1, 0], [1, 1]]:
print(n3([n1(new), n2(new)]))
As you can see I use the sigmoid function as activation function and I haven't implemented the momentum yet (I have still to get how it works).
The network works, in most cases. But I found some cases in which it outputs some strange values (see for example the two random seeds I commented in the Neuron class constructor). This two cases are solved if I increase the Neuron.learning_rate (for example set it to 10), but still some other exceptions come up (couldn't find seeds for these, I'm sorry... But they're quire frequent, just run the code for 10 or 20 times and you'll see some).
The question is: Why is this happening? Is my network too "small"/simple? I thought 3 neurons would be enough. Or is it just a problem of "calibration" (don't know how this is called, I mean the process of adjusting the learning rate factor and the momentum, which here is absent)? Or did I even commit any mistakes? I can't really figure out.
EDIT: I'm training the net with all possible cases and then trying it with the same cases just to verify if it works correctly.