1

Task: Classify image if it is image of human or not.

Train set: Composed of 200 image which is labeled(train_y) 0 = not human, 1 = human
Test set: Composed of 49 image which is labeled(test_y) 0 = not human, 1 = human Both 64*64 size image with RGB scale.

First to explain about what I intended,

  1. Convert image shape from (64,64,3) to (1, 64x64x3).
  2. 2 Hidden layer
  3. 1 Output layer
  4. Feed forward training with activation function ReLU, and after output, back propagation begins.
  5. If loss value doesn't drop during 30 epochs, early stop.
  6. Test

Accuracy doesn't get higher than 60%. Almost every time it predicts test label as all 0 which seems to have problem while training. In my thought, a) overfitting problem, b) test set image composition(As I made by own only having training set images)

After fixing my problem, if possible I'm also curious how to save result data as I change parameters(e.g. activation function, layer, node numbers) and make a graph of loss value and prediction accuracy.

I'm working on jupyter notebook!

# Load train data

import h5py
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

filename = './train.hdf5'

with h5py.File(filename, 'r') as f:
    for key in f.keys():
        print('Dataset name: ', key)
        
    train_x = f['train_x'][:]
    train_y = f['train_y'][:]

print(train_x[0][11])
print(len(train_x))
print(train_y)
print(len(train_y))


# Load test data

filename = './test.hdf5'

with h5py.File(filename, 'r') as f:
    for key in f.keys():
        print('Dataset name: ', key)
        
    test_x = f['test_x'][:]
    test_y = f['test_y'][:]

print(test_x)
print(len(test_x))
print(test_y)
print(len(test_y))

# Just to see whether images are properly loaded.
for i in range(30):
    img = train_x[i]
    
    if img.shape[-1] == 1:
        img = img.squeeze(axis=-1)
        
    plt.imshow(img, cmap='gray')
    plt.show()


# MY NEURAL NETWORK!

# ReLU activation function
def relu(X):
    return np.maximum(0,X)

# Derivative of ReLU function
def relu_derivative(X):
    return np.where(X<=0, 0, 1)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

# Softmax function
def softmax(X):
    exps = np.exp(X - np.max(X, axis=1, keepdims=True))
    return exps/np.sum(exps, axis=1, keepdims=True)

#Cross entropy loss
def cross_entropy(y_pred, y_true):
    # Clip values to prevent log(0)
    y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
    n_samples = y_true.shape[0]
    logp = - np.log(y_pred[np.arange(n_samples), y_true.argmax(axis=1)])
    loss = np.sum(logp)/n_samples
    return loss

def error(pred, real):
    n_samples = real.shape[0]
    pred = np.clip(pred, 1e-7, 1 - 1e-7)  # Add this line
    logp = - np.log(pred[np.arange(n_samples), real.argmax(axis=1)])
    loss = np.sum(logp)/n_samples
    return loss

class NeuralNet:
    def __init__(self, x, y):
        self.x = x
        self.x = x.reshape(x.shape[0], -1) # (200, 64*64*3)
        self.y = y # (200, 2)
        neurons = 128 # neurons for hidden layers
        self.lr = 0.01 # learning rate
        ip_dim = self.x.shape[1] # input layer size
        op_dim = self.y.shape[1] # output layer size

        self.w1 = np.random.randn(ip_dim, neurons)
        print(self.w1)
        self.b1 = np.zeros((1, neurons))
        self.w2 = np.random.randn(neurons, neurons)
        self.b2 = np.zeros((1, neurons))
        self.w3 = np.random.randn(neurons, op_dim)
        self.b3 = np.zeros((1, op_dim))
        

    def feedforward(self):
        z1 = np.dot(self.x, self.w1) + self.b1
        self.a1 = relu(z1)
        z2 = np.dot(self.a1, self.w2) + self.b2
        self.a2 = relu(z2)
        z3 = np.dot(self.a2, self.w3) + self.b3 
        self.a3 = softmax(z3)
        
    def backprop(self):
        loss = error(self.a3, self.y)
        print(f'Loss #{self.epoch} :', loss)
        self.loss = loss 
        
        a3_delta = self.a3 - self.y # w3
        z2_delta = np.dot(a3_delta, self.w3.T)
        a2_delta = z2_delta * relu_derivative(self.a2) # w2
        z1_delta = np.dot(a2_delta, self.w2.T)
        a1_delta = z1_delta * relu_derivative(self.a1) # w1

        self.w3 -= self.lr * np.dot(self.a2.T, a3_delta)
        self.b3 -= self.lr * np.sum(a3_delta, axis=0, keepdims=True)
        self.w2 -= self.lr * np.dot(self.a1.T, a2_delta)
        self.b2 -= self.lr * np.sum(a2_delta, axis=0)
        self.w1 -= self.lr * np.dot(self.x.T, a1_delta)
        self.b1 -= self.lr * np.sum(a1_delta, axis=0)

    def train(self, epochs, early_stopping_rounds):
        self.losses = []
        no_improvement = 0
        min_loss = np.inf

        for x in range(epochs):
            self.epoch = x
            self.feedforward()
            self.backprop()
            
            self.losses.append(self.loss)
            if x % 100 == 0:
                print(f"Loss at epoch {x+1}: ", self.loss)

            if self.loss < min_loss:
                min_loss = self.loss
                no_improvement = 0
            else:
                no_improvement += 1

            if no_improvement >= early_stopping_rounds:
                print(f"Early stopping triggered at epoch: {x+1}")
                break


    def predict(self, data):
        self.x = data.reshape(data.shape[0], -1) # Reshape data to be 2D
        self.feedforward()
        return self.a3.argmax(axis=1) # Return the predicted class for each image

    
model = NeuralNet(train_x/255, np.eye(int(np.max(train_y)+1))[train_y.astype(int)]) # initializing neural network
model.train(epochs=500, early_stopping_rounds=30)


# Let's test!

predictions = model.predict(padded_test_x) # Predict the class for each test image
print(predictions)
accuracy = np.mean(predictions == test_y) # Calculate the accuracy of the predictions
print(f'The accuracy of the model on the test set is {accuracy * 100}%')```

The result after running #My NEURAL NETWORK, is as below.

[[-1.80046772 -1.32989811  1.71147423 ... -0.09803887  0.03584923
   2.12074011]
 [-0.05272664  0.59007847 -0.13174816 ... -1.11398221  1.53315098
  -0.79629237]
 [ 0.56393492  0.76091384  0.91743199 ...  3.1121818  -1.50333057
  -0.34857296]
 ...
 [ 2.05895307  0.423212   -0.32618338 ... -0.33377168 -0.85285953
  -0.14190168]
 [-0.96296542 -0.18003826  0.49210307 ... -1.0200496   1.04274194
  -1.08375463]
 [ 0.84962868 -1.07309205 -0.81095434 ... -0.69421811  0.08447456
  -0.06663053]]
Loss #0 : 8.784362175272287
Loss at epoch 1:  8.784362175272287
Loss #1 : 7.2531430979312495
Loss #2 : 8.86495265302708
Loss #3 : 7.2531430979312495
Loss #4 : 8.86495265302708
Loss #5 : 1.3905449591113785
Loss #6 : 0.8437534904161962
Loss #7 : 0.6921630745011662
Loss #8 : 0.6881406917462543
Loss #9 : 0.6881388139087765
Loss #10 : 0.688138813713608
Loss #11 : 0.6881388137135884
Loss #12 : 0.6881388137135886
Loss #13 : 0.6881388137135883
Loss #14 : 0.6881388137135884
Loss #15 : 0.6881388137135884
Loss #16 : 0.6881388137135884
Loss #17 : 0.6881388137135886
Loss #18 : 0.6881388137135886
Loss #19 : 0.6881388137135886
Loss #20 : 0.6881388137135886
Loss #21 : 0.6881388137135886
Loss #22 : 0.6881388137135886
Loss #23 : 0.6881388137135886
Loss #24 : 0.6881388137135886
Loss #25 : 0.6881388137135886
Loss #26 : 0.6881388137135886
Loss #27 : 0.6881388137135886
Loss #28 : 0.6881388137135886
Loss #29 : 0.6881388137135886
Loss #30 : 0.6881388137135886
Loss #31 : 0.6881388137135886
Loss #32 : 0.6881388137135886
Loss #33 : 0.6881388137135886
Loss #34 : 0.6881388137135886
Loss #35 : 0.6881388137135886
Loss #36 : 0.6881388137135886
Loss #37 : 0.6881388137135886
Loss #38 : 0.6881388137135886
Loss #39 : 0.6881388137135886
Loss #40 : 0.6881388137135886
Loss #41 : 0.6881388137135886
Loss #42 : 0.6881388137135886
Loss #43 : 0.6881388137135886
Early stopping triggered at epoch: 44


And after running #Let's start!, result is as below.

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0]
The accuracy of the model on the test set is 57.14285714285714%

James Jang
  • 31
  • 7

0 Answers0