Hello Im putting my code for inspection because Ive been playing with my neural network implementation in python for a few weeks and I cant seem to reach a misclasification error below 17% some times 16%. ve been trying different learning rate values, different hidden neurons number and still not a lot of mprovement. Im well aware my implementation is the basic traditional neural network, but I was expecting better results according to other implementations ive seen in internet. I hope this is of interest for you guys, it would be really cool if you could point me new ideas about what could be the problem in my code, or maybe you think this is the best I can do with a traditional implementation and I should add something new, that would be cool too.
In any case, here is my code, I hope is readable enough, I tried to do it as simple as possible, since its my way to understand how neural networks work.
Edit:Perhaps my question is not so clear, basically I would like, if its of interest for you guys, is to help me find details in my current implementation that could improve my misclasification error below 17% because apparently its the best my implementation can do. I would be very thankful for any advise or idea, Im deeply interested in this topic, but Im a beginner and it would be great to have some smart ideas that can help me improve my implementation.
File: mnist_dataset.py - Extract the mnist data
import numpy as np
from struct import unpack
train_input_file = open("dataset/train-images-idx3-ubyte", "rb")
train_output_file = open("/dataset/train-labels-idx1-ubyte", "rb")
test_input_file = open("dataset/t10k-images-idx3-ubyte", "rb")
test_output_file = open("dataset/t10k-labels-idx1-ubyte", "rb")
def readData(f,labels = False,scale = 1):
header = hex(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
num = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
col = 1
row = 1
if labels == False:
row = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
col = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
data = np.zeros((int(num/scale),col*row))
for i in range(0,int(num/scale),1):
data[i] = np.fromfile(f,dtype=np.ubyte,count=col*row)
return data
def getMNISTData():
def norm(v):
return v/255
train_input = readData(train_input_file, scale=1)/255.0
train_out = readData(train_output_file, True,scale=1)
test_input = readData(test_input_file)/255.0
test_out = readData(test_output_file, True)
print "Train input: " + str(train_input.shape)
print "Train output: " + str(train_out.shape)
print "Test input: " + str(test_input.shape)
print "Test output: " + str(test_out.shape)
train_input_file.close()
train_output_file.close()
test_input_file.close()
test_output_file.close()
return (train_input,train_out,test_input,test_out)
File: NN.py - neural network implementation
import mnist_dataset
import numpy as np
import random
import matplotlib.pyplot as plt
def encode_data_10(v):
e = (0.0) * np.ones((1, 10), dtype=float)
e[:, int(v)] = 1.0
return e.tolist()
def encode_data_1(v):
n = -1.0 + ((0.2)*v)
return n
x_train, y_train, x_test, y_test = mnist_dataset.getMNISTData()
learning_rate = 1.0
iter = 3000
sample_size = 30
num_hidden_neurons = 500
num_output_neurons = 10
if num_output_neurons > 1:
y_train = np.matrix(np.array(map(encode_data_10,y_train)))
y_test = np.matrix(np.array(map(encode_data_10,y_test)))
else:
y_train = np.matrix(map(encode_data_1,y_train))
y_test = np.matrix(map(encode_data_1,y_test))
def getSample(sample_size,x,y):
r = random.sample(xrange(1, len(y), 1), sample_size)
x_r = np.zeros((sample_size,x.shape[1]))
y_r = np.zeros((sample_size,y.shape[1]))
for i,n in enumerate(r):
x_r[i] = x[n]
y_r[i] = y[n]
return (x_r,y_r)
inputVector, targetVector = getSample(sample_size, x_train, y_train)
hiddenWeights = np.mat(np.random.random((num_hidden_neurons, x_train.shape[1])))
print "W0 shape: " + str(hiddenWeights.shape)
outputWeights = np.mat(np.random.random((num_output_neurons,num_hidden_neurons)))
print "W1 shape: " + str(outputWeights.shape)
def act_func_l1(a):
return (1.0/(1 + np.exp(-a)))
def der_act_func_l1(a):
return act_func_l1(a)*(1.0 - act_func_l1(a))
def feedforward(l0):
global hiddenWeights
global outputWeights
Z1 = l0 * hiddenWeights.T
layer1 = np.matrix(act_func_l1(np.asarray(Z1)))
Z2 = layer1 * outputWeights.T
layer2 = act_func_l1(np.asarray(Z2))
return (layer1,layer2)
def miss(x,y):
layer1, layer2 = feedforward(x)
def c(n):
if n > 0.5:
return 1.0
else:
return 0.0
layer2 = map(lambda v: map(c, v), layer2)
def cc(t):
return np.abs(cmp(np.array(y[t[0]]).tolist()[0], np.array(t[1]).tolist()))
return (np.sum(map(cc, enumerate(layer2))))
miss_x = np.zeros((iter, 1))
for j in xrange(iter):
hiddenActualInput = inputVector * hiddenWeights.T
hiddenOutputVector = np.matrix(act_func_l1(np.asarray(hiddenActualInput)))
outputActualInput = hiddenOutputVector * outputWeights.T
outputVector = act_func_l1(np.asarray(outputActualInput))
layer2_error2 = np.square(outputVector - targetVector)
print "Error: " + str(np.mean(np.abs(layer2_error2)))
m = miss(x_test,y_test)
miss_x[j] = m
print str(j) + " - Misses (%): " + str(m)
if m <= 2000:
learning_rate = 0.05
outputDelta = np.mat(der_act_func_l1(np.asarray(outputVector))*np.asarray(outputVector - targetVector))
hiddenDelta = np.mat(der_act_func_l1(np.asarray(hiddenOutputVector)) * np.asarray((outputDelta*outputWeights)))
hiddenWeights = np.mat(hiddenWeights.T - (learning_rate*np.asarray(inputVector.T*hiddenDelta))).T
outputWeights = np.mat(outputWeights.T - (learning_rate*np.asarray(hiddenOutputVector.T*outputDelta))).T
inputVector, targetVector = getSample(sample_size, x_train, y_train)
plt.plot(xrange(iter), miss_x, label = 'Miss rate(%)')
plt.legend(loc='upper right')
plt.show()