I am trying to code a neural network from scratch i.e. using only numpy and pandas. I am trying to use it for the iris dataset. Here is my code:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
data = pd.read_csv('/Users/MyName/Downloads/iris.data')
X_train = data.iloc[:75, :4]
y_train = data.iloc[:75, 4]
X_test = data.iloc[75:, :4]
le = LabelEncoder()
y_train = le.fit_transform(y_train)
class NeuralNet():
def __init__(self, i_dim, h_dim, o_dim, lr):
self.i_dim = i_dim
self.h_dim = h_dim
self.o_dim = o_dim
self.lr = lr
self.weights1 = np.random.randn(self.i_dim, self.h_dim) / np.sqrt(self.i_dim)
self.bias1 = np.zeros((1, self.h_dim))
self.weights2 = np.random.randn(self.h_dim, self.o_dim) / np.sqrt(self.h_dim)
self.bias2 = np.zeros((1, self.o_dim))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def softmax(self, x):
exps = np.exp(x - np.max(x, axis=1, keepdims=True))
return exps / np.sum(exps, axis=1, keepdims=True)
def forward(self, X):
self.layer1 = self.sigmoid(np.dot(X, self.weights1) + self.bias1)
self.layer2 = self.softmax(np.dot(self.layer1, self.weights2) + self.bias2)
return self.layer2
def sigmoid_derivative(self, x):
return x * (1 - x)
def softmax_derivative(self, x):
s = x.reshape(-1, 1)
return np.diagflat(s) - np.dot(s, s.T)
def cross_ent_loss(self, y, y_hat):
y_reshaped = np.zeros((y.size, y.max() + 1))
y_reshaped[np.arange(y.size), y] = 1
sample_losses = - y_reshaped * np.log(y_hat)
loss = np.mean(sample_losses)
return loss
def backward(self, X, y, y_hat):
y_hat = self.forward(X)
loss = self.cross_ent_loss(y, y_hat)
d_softmax = self.softmax_derivative(y_hat)
d_sigmoid = self.sigmoid_derivative(self.layer1)
d_weights2 = np.dot(self.layer1.T, (2 * (y - y_hat)[:, np.newaxis, :] * d_softmax))
d_bias2 = np.sum(2 * (y - y_hat)[:, np.newaxis, :] * d_softmax,axis=0,keepdims=True)
d_weights1 = np.dot(X.T, (np.dot(2 * (y - y_hat)[:, np.newaxis, :] * d_softmax, self.weights2.T) * d_sigmoid))
d_bias1 = np.sum(np.dot(2 * (y - y_hat)[:, np.newaxis, :] * d_softmax, self.weights2.T) * d_sigmoid, axis=0)
self.weights1 -= self.lr * d_weights1
self.bias1 -= self.lr * d_bias1
self.weights2 -= self.lr * d_weights2
self.bias2 -= self.lr * d_bias2
def train(self, X, y, epochs):
for epoch in range(epochs):
y_hat = self.forward(X)
self.backward(X, y, y_hat)
loss = self.cross_ent_loss(y, y_hat)
print(f"Epoch {epoch + 1}: loss = {loss:.4f}")
def predict(self, X):
return self.forward(X)
nn = NeuralNet(4, 5, 3, 0.1)
nn.train(X_train, y_train, epochs=1000)
y_pred = nn.predict(X_train)
y_pred_labels = np.argmax(y_pred, axis=1)
I'm getting the error "ValueError: operands could not be broadcast together with shapes (75,2) (75,3) " and it seems to be happening in the definition of the cross entropy loss in particular on the line "sample_losses = - y_reshaped * np.log(y_hat) "
I've tried reshaping y in different ways and seem to keep getting this error.