I'm tasked with writing a ANN using only NumPy (no TensorFlow, PyTorch , etc.) on the iris dataset. I'm running 2000 epochs and it seems by the time of epoch 40 the accuracy of the network stays at 0.66. Also the parameters while debugging are either extremely high or extremely low (for example, for self.layers[0]
, the self.output
parameter is [-59.2447737,-79.13719157,-57.27055739,117.26796309,127.71775426]
on epoch 400.
My network has 4 input nodes, a single hidden layer with 5 nodes and an output layer with 3 nodes corresponding to the 3 types of irises.
I'm confused as to why that's the case. The learning rate is low (0.01), the weights
and biases
vectors are initialized with low values, and I normalized the input data.
Any help with this would be highly appreciated. My code:
main.py:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from network import NeuralNetwork
from layer import Layer
if __name__ == "__main__":
iris = load_iris()
data, target, target_names = iris.data, iris.target, iris.target_names
scaler = StandardScaler()
# One hot encoding to ap the target array to match the 3 neurons output structure
one_hot_targets = []
for i in range(len(target)):
vec = np.zeros(len(target_names))
vec[target[i]] = 1
one_hot_targets.append(vec)
one_hot_targets = np.array(one_hot_targets)
X_train, X_test, Y_train, Y_test = train_test_split(data, one_hot_targets, test_size=0.33, shuffle=True)
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
learning_rate = 0.01
# Init a network and add it's layers. Input layer is represented by the input, and not by an actual layer
network = NeuralNetwork(learning_rate)
network.add_layer(Layer(4, 5)) # hidden layer 1
network.add_layer(Layer(5, 3)) # output layer
# Train the network for a number of epochs
network.train(X_train_scaled, Y_train, epochs=2000)
# Test for the test data seperated earlier
output, accuracy = network.test(X_test_scaled, Y_test)
# Print testing output
for i in range(len(output)):
prediction = target_names[np.argmax(output[i])]
answer = target_names[np.argmax(Y_test[i])]
print(f"For testing row: {X_test[i]}, the prediction was {prediction} and the answer was {answer}")
print(f"Network test accuracy: {accuracy:.4f}")
network.py:
import numpy as np
from utils import calc_error
np.random.seed(10)
class NeuralNetwork:
def __init__(self, learning_rate=0.1):
self.layers = []
self.learning_rate = learning_rate
def add_layer(self, layer):
# Layers must be added in order
self.layers.append(layer)
def forward_propagate(self, input):
output = input
for layer in self.layers:
output = layer.forward_propagate(output)
return output
def back_propagate(self, error):
for layer in reversed(self.layers):
error = layer.back_propagate(error)
def train_iteration(self, input, target):
output = self.forward_propagate(input)
# Calculate the error between the output and the target value
error = output - target
# Backpropagate the error through the network
self.back_propagate(error)
# Update the weights and biases of the layers
for layer in self.layers:
layer.weights -= self.learning_rate * layer.d_weights
layer.biases -= self.learning_rate * layer.d_biases
def train_epoch(self, inputs, targets):
for i in range(len(inputs)):
x = inputs[i]
y = targets[i]
self.train_iteration(x, y)
def train(self, inputs, targets, epochs=4000):
for epoch in range(epochs):
self.train_epoch(inputs, targets)
if epoch % (epochs / 100) == 0:
_, accuracy = self.test(inputs, targets)
print(f"Epoch {epoch} --> Training Accuracy:{accuracy}")
def predict(self, input):
output = self.forward_propagate(input)
return output
def test(self, inputs, targets):
output, correct = [], 0
for i in range(len(inputs)):
x, y = inputs[i], targets[i]
guess = self.predict(x)
is_correct = y[guess.argmax()] == 1
correct += is_correct
output.append(guess)
return output, (correct / len(inputs))
layer.py:
import numpy as np
from utils import sigmoid, deriv_sigmoid
np.random.seed(10)
class Layer:
def __init__(self, num_inputs, num_neurons, activation_function=sigmoid, derivative_activation_function=deriv_sigmoid):
self.weights = np.random.randn(num_inputs, num_neurons) * 0.01
self.biases = np.zeros((1, num_neurons))
self.activation_function = activation_function
self.derivative_activation_function = derivative_activation_function
def forward_propagate(self, input):
self.input = input
self.output = np.dot(input, self.weights) + self.biases
self.activated_output = self.activation_function(self.output)
return self.activated_output
def back_propagate(self, error):
error = self.derivative_activation_function(error)
reshaped_input = self.input.T.reshape((np.max(self.input.shape), 1)) # ensures dot product always works
self.d_weights = np.dot(reshaped_input, error)
self.d_biases = np.sum(error, axis=0, keepdims=True)
self.d_input = np.dot(error, self.weights.T)
return self.d_input
utils.py:
import numpy as np
def sigmoid(x):
return (1 / (1 + np.exp(-x)))
def deriv_sigmoid(x):
return np.multiply(x, 1-x)