0

I am trying to code a neural network from scratch i.e. using only numpy and pandas. I am trying to use it for the iris dataset. Here is my code:

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

data = pd.read_csv('/Users/MyName/Downloads/iris.data')

X_train = data.iloc[:75, :4]
y_train = data.iloc[:75, 4]
X_test = data.iloc[75:, :4]

le = LabelEncoder()
y_train = le.fit_transform(y_train)

class NeuralNet():
    def __init__(self, i_dim, h_dim, o_dim, lr):
        self.i_dim = i_dim
        self.h_dim = h_dim
        self.o_dim = o_dim
        self.lr = lr

        self.weights1 = np.random.randn(self.i_dim, self.h_dim) / np.sqrt(self.i_dim)
        self.bias1 = np.zeros((1, self.h_dim))
        self.weights2 = np.random.randn(self.h_dim, self.o_dim) / np.sqrt(self.h_dim)
        self.bias2 = np.zeros((1, self.o_dim))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def softmax(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)

    def forward(self, X):
        self.layer1 = self.sigmoid(np.dot(X, self.weights1) + self.bias1)
        self.layer2 = self.softmax(np.dot(self.layer1, self.weights2) + self.bias2)
        return self.layer2

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def softmax_derivative(self, x):
        s = x.reshape(-1, 1)
        return np.diagflat(s) - np.dot(s, s.T)

    def cross_ent_loss(self, y, y_hat):
        y_reshaped = np.zeros((y.size, y.max() + 1))
        y_reshaped[np.arange(y.size), y] = 1
        sample_losses = - y_reshaped * np.log(y_hat)
        loss = np.mean(sample_losses)
        return loss

    def backward(self, X, y, y_hat):
        y_hat = self.forward(X)
        loss = self.cross_ent_loss(y, y_hat)

        d_softmax = self.softmax_derivative(y_hat)
        d_sigmoid = self.sigmoid_derivative(self.layer1)

        d_weights2 = np.dot(self.layer1.T, (2 * (y - y_hat)[:, np.newaxis, :] * d_softmax))
    d_bias2 = np.sum(2 * (y - y_hat)[:, np.newaxis, :] * d_softmax,axis=0,keepdims=True)
        d_weights1 = np.dot(X.T, (np.dot(2 * (y - y_hat)[:, np.newaxis, :] * d_softmax,    self.weights2.T) * d_sigmoid))
        d_bias1 = np.sum(np.dot(2 * (y - y_hat)[:, np.newaxis, :] * d_softmax, self.weights2.T) * d_sigmoid, axis=0)

        self.weights1 -= self.lr * d_weights1
        self.bias1 -= self.lr * d_bias1
        self.weights2 -= self.lr * d_weights2
        self.bias2 -= self.lr * d_bias2

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            y_hat = self.forward(X)
            self.backward(X, y, y_hat)
            loss = self.cross_ent_loss(y, y_hat)
            print(f"Epoch {epoch + 1}: loss = {loss:.4f}")

    def predict(self, X):
        return self.forward(X)

nn = NeuralNet(4, 5, 3, 0.1)
nn.train(X_train, y_train, epochs=1000)

y_pred = nn.predict(X_train)

y_pred_labels = np.argmax(y_pred, axis=1)


I'm getting the error "ValueError: operands could not be broadcast together with shapes (75,2) (75,3) " and it seems to be happening in the definition of the cross entropy loss in particular on the line "sample_losses = - y_reshaped * np.log(y_hat) "

I've tried reshaping y in different ways and seem to keep getting this error.

MattDMo
  • 100,794
  • 21
  • 241
  • 231
gusifer98
  • 1
  • 2

0 Answers0