Task: Classify image if it is image of human or not.
Train set: Composed of 200 image which is labeled(train_y) 0 = not human, 1 = human
Test set: Composed of 49 image which is labeled(test_y) 0 = not human, 1 = human
Both 64*64 size image with RGB scale.
First to explain about what I intended,
- Convert image shape from (64,64,3) to (1, 64x64x3).
- 2 Hidden layer
- 1 Output layer
- Feed forward training with activation function ReLU, and after output, back propagation begins.
- If loss value doesn't drop during 30 epochs, early stop.
- Test
Accuracy doesn't get higher than 60%. Almost every time it predicts test label as all 0 which seems to have problem while training. In my thought, a) overfitting problem, b) test set image composition(As I made by own only having training set images)
After fixing my problem, if possible I'm also curious how to save result data as I change parameters(e.g. activation function, layer, node numbers) and make a graph of loss value and prediction accuracy.
I'm working on jupyter notebook!
# Load train data
import h5py
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
filename = './train.hdf5'
with h5py.File(filename, 'r') as f:
for key in f.keys():
print('Dataset name: ', key)
train_x = f['train_x'][:]
train_y = f['train_y'][:]
print(train_x[0][11])
print(len(train_x))
print(train_y)
print(len(train_y))
# Load test data
filename = './test.hdf5'
with h5py.File(filename, 'r') as f:
for key in f.keys():
print('Dataset name: ', key)
test_x = f['test_x'][:]
test_y = f['test_y'][:]
print(test_x)
print(len(test_x))
print(test_y)
print(len(test_y))
# Just to see whether images are properly loaded.
for i in range(30):
img = train_x[i]
if img.shape[-1] == 1:
img = img.squeeze(axis=-1)
plt.imshow(img, cmap='gray')
plt.show()
# MY NEURAL NETWORK!
# ReLU activation function
def relu(X):
return np.maximum(0,X)
# Derivative of ReLU function
def relu_derivative(X):
return np.where(X<=0, 0, 1)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def tanh(x):
return np.tanh(x)
# Softmax function
def softmax(X):
exps = np.exp(X - np.max(X, axis=1, keepdims=True))
return exps/np.sum(exps, axis=1, keepdims=True)
#Cross entropy loss
def cross_entropy(y_pred, y_true):
# Clip values to prevent log(0)
y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
n_samples = y_true.shape[0]
logp = - np.log(y_pred[np.arange(n_samples), y_true.argmax(axis=1)])
loss = np.sum(logp)/n_samples
return loss
def error(pred, real):
n_samples = real.shape[0]
pred = np.clip(pred, 1e-7, 1 - 1e-7) # Add this line
logp = - np.log(pred[np.arange(n_samples), real.argmax(axis=1)])
loss = np.sum(logp)/n_samples
return loss
class NeuralNet:
def __init__(self, x, y):
self.x = x
self.x = x.reshape(x.shape[0], -1) # (200, 64*64*3)
self.y = y # (200, 2)
neurons = 128 # neurons for hidden layers
self.lr = 0.01 # learning rate
ip_dim = self.x.shape[1] # input layer size
op_dim = self.y.shape[1] # output layer size
self.w1 = np.random.randn(ip_dim, neurons)
print(self.w1)
self.b1 = np.zeros((1, neurons))
self.w2 = np.random.randn(neurons, neurons)
self.b2 = np.zeros((1, neurons))
self.w3 = np.random.randn(neurons, op_dim)
self.b3 = np.zeros((1, op_dim))
def feedforward(self):
z1 = np.dot(self.x, self.w1) + self.b1
self.a1 = relu(z1)
z2 = np.dot(self.a1, self.w2) + self.b2
self.a2 = relu(z2)
z3 = np.dot(self.a2, self.w3) + self.b3
self.a3 = softmax(z3)
def backprop(self):
loss = error(self.a3, self.y)
print(f'Loss #{self.epoch} :', loss)
self.loss = loss
a3_delta = self.a3 - self.y # w3
z2_delta = np.dot(a3_delta, self.w3.T)
a2_delta = z2_delta * relu_derivative(self.a2) # w2
z1_delta = np.dot(a2_delta, self.w2.T)
a1_delta = z1_delta * relu_derivative(self.a1) # w1
self.w3 -= self.lr * np.dot(self.a2.T, a3_delta)
self.b3 -= self.lr * np.sum(a3_delta, axis=0, keepdims=True)
self.w2 -= self.lr * np.dot(self.a1.T, a2_delta)
self.b2 -= self.lr * np.sum(a2_delta, axis=0)
self.w1 -= self.lr * np.dot(self.x.T, a1_delta)
self.b1 -= self.lr * np.sum(a1_delta, axis=0)
def train(self, epochs, early_stopping_rounds):
self.losses = []
no_improvement = 0
min_loss = np.inf
for x in range(epochs):
self.epoch = x
self.feedforward()
self.backprop()
self.losses.append(self.loss)
if x % 100 == 0:
print(f"Loss at epoch {x+1}: ", self.loss)
if self.loss < min_loss:
min_loss = self.loss
no_improvement = 0
else:
no_improvement += 1
if no_improvement >= early_stopping_rounds:
print(f"Early stopping triggered at epoch: {x+1}")
break
def predict(self, data):
self.x = data.reshape(data.shape[0], -1) # Reshape data to be 2D
self.feedforward()
return self.a3.argmax(axis=1) # Return the predicted class for each image
model = NeuralNet(train_x/255, np.eye(int(np.max(train_y)+1))[train_y.astype(int)]) # initializing neural network
model.train(epochs=500, early_stopping_rounds=30)
# Let's test!
predictions = model.predict(padded_test_x) # Predict the class for each test image
print(predictions)
accuracy = np.mean(predictions == test_y) # Calculate the accuracy of the predictions
print(f'The accuracy of the model on the test set is {accuracy * 100}%')```
The result after running #My NEURAL NETWORK, is as below.
[[-1.80046772 -1.32989811 1.71147423 ... -0.09803887 0.03584923
2.12074011]
[-0.05272664 0.59007847 -0.13174816 ... -1.11398221 1.53315098
-0.79629237]
[ 0.56393492 0.76091384 0.91743199 ... 3.1121818 -1.50333057
-0.34857296]
...
[ 2.05895307 0.423212 -0.32618338 ... -0.33377168 -0.85285953
-0.14190168]
[-0.96296542 -0.18003826 0.49210307 ... -1.0200496 1.04274194
-1.08375463]
[ 0.84962868 -1.07309205 -0.81095434 ... -0.69421811 0.08447456
-0.06663053]]
Loss #0 : 8.784362175272287
Loss at epoch 1: 8.784362175272287
Loss #1 : 7.2531430979312495
Loss #2 : 8.86495265302708
Loss #3 : 7.2531430979312495
Loss #4 : 8.86495265302708
Loss #5 : 1.3905449591113785
Loss #6 : 0.8437534904161962
Loss #7 : 0.6921630745011662
Loss #8 : 0.6881406917462543
Loss #9 : 0.6881388139087765
Loss #10 : 0.688138813713608
Loss #11 : 0.6881388137135884
Loss #12 : 0.6881388137135886
Loss #13 : 0.6881388137135883
Loss #14 : 0.6881388137135884
Loss #15 : 0.6881388137135884
Loss #16 : 0.6881388137135884
Loss #17 : 0.6881388137135886
Loss #18 : 0.6881388137135886
Loss #19 : 0.6881388137135886
Loss #20 : 0.6881388137135886
Loss #21 : 0.6881388137135886
Loss #22 : 0.6881388137135886
Loss #23 : 0.6881388137135886
Loss #24 : 0.6881388137135886
Loss #25 : 0.6881388137135886
Loss #26 : 0.6881388137135886
Loss #27 : 0.6881388137135886
Loss #28 : 0.6881388137135886
Loss #29 : 0.6881388137135886
Loss #30 : 0.6881388137135886
Loss #31 : 0.6881388137135886
Loss #32 : 0.6881388137135886
Loss #33 : 0.6881388137135886
Loss #34 : 0.6881388137135886
Loss #35 : 0.6881388137135886
Loss #36 : 0.6881388137135886
Loss #37 : 0.6881388137135886
Loss #38 : 0.6881388137135886
Loss #39 : 0.6881388137135886
Loss #40 : 0.6881388137135886
Loss #41 : 0.6881388137135886
Loss #42 : 0.6881388137135886
Loss #43 : 0.6881388137135886
Early stopping triggered at epoch: 44
And after running #Let's start!, result is as below.
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0]
The accuracy of the model on the test set is 57.14285714285714%