import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
input = [[0,0,1],[0,1,1],[1,0,1],[1,1,1]]
output = [0,1,1,0]
N = np.size(input,0) # number of samples
Ni = np.size(input,1) # dimension of the samples of input
No = 1 # dimension of the sample of output
Nh = 10 # number of hidden units
Ws = 1/4*np.random.rand(Nh,Ni+1)
print(Ws)
Wo = 1/4*np.random.rand(No,Nh)
print(Wo)
alpha = 0.05 # Learning rate
t_ = []
loss_ = []
def ReLU(x):
return np.maximum(0,x)
def sigmoid(x):
return 1/(1+np.exp(-x))
## train the model ====================================================================
for epoch in range(0,3000):
loss = 0
for id_ in range(0,N):
dWs = 0*Ws
dWo = 0*Wo
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
Z_2 = np.dot(Wo,ReLU(Z_1))
y = sigmoid(Z_2)
d = output[id_]
for j in range(0,Nh):
for i in range(0,No):
if Z_1[j] >= 0:
dWo[i,j] = dWo[i,j] + (y[i]-d)*Z_1[j]
#dWo[i,j] = dWo[i,j] + sigmoid(Z_1[j])*(y[i]-d)
else:
dWo[i,j] += 0
Wo = Wo - alpha*dWo
for k in range(0,Ni+1):
for j in range(0,Nh):
for i in range(0,No):
if Z_1[j] >= 0:
dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*(y[i]-d)
#dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*sigmoid(Z_1[j])*(1-sigmoid(Z_1[j]))*(y[i]-d)
else:
dWs[j,k] += 0
Ws = Ws - alpha*dWs
loss = loss + 1/2*np.linalg.norm(y-d)
if np.mod(epoch,50) == 0:
print(epoch,"-th epoch trained")
t_ = np.append(t_,epoch)
loss_ = np.append(loss_,loss)
fig = plt.figure(num=0,figsize=[10,5])
plt.plot(t_,loss_,marker="")
plt.title('Loss decay')
plt.xlabel('epoch',FontSize=20)
plt.ylabel('Loss',FontSize=20)
plt.show()
## figure out the function shape the model==========================================
xn = np.linspace(0,1,20)
yn = np.linspace(0,1,20)
xm, ym = np.meshgrid(xn, yn)
xx = np.reshape(xm,np.size(xm,0)*np.size(xm,1))
yy = np.reshape(ym,np.size(xm,0)*np.size(xm,1))
Z = []
for id__ in range(0,np.size(xm)):
x = np.append([xx[id__],yy[id__]],[1,1])
Z_1 = np.dot(Ws,x)
y_ = sigmoid(np.dot(Wo,ReLU(Z_1)))
Z = np.append(Z,y_)
fig = plt.figure(num=1,figsize=[10,5])
ax = fig.gca(projection='3d')
surf = ax.plot_surface(xm,ym,np.reshape(Z,(np.size(xm,0),np.size(xm,1))),cmap='coolwarm',linewidth=0,antialiased=False)
print("====================================================================")
plt.show()
## test the trained model ====================================================================
for id_ in range(0,N):
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
y = sigmoid(np.dot(Wo,ReLU(Z_1)))
print(y)
If I try this with sigmoid function, it works fine but when the ReLU activation function is implemented, the the program doesn't learning anything.
The NN consist of 3 input, hidden, output layers and sigmoid activation fuction is implemented for output function. Hand calculation seems fine but can't find the flaw.
The code below with sigmoid activation function works just fine.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
input = [[0,0,1],[0,1,1],[1,0,1],[1,1,1]]
output = [0,1,1,0]
N = np.size(input,0) # number of samples
Ni = np.size(input,1) # dimension of the samples of input
No = 1 # dimension of the sample of output
Nh = 5 # number of hidden units
Ws = 1/4*np.random.rand(Nh,Ni+1)
#print(Ws)
Wo = 1/4*np.random.rand(No,Nh)
#print(Wo)
alpha = 0.1 # Learning rate
t_ = []
loss_ = []
def sigmoid(x):
return 1/(1+np.exp(-x))
## train the model ====================================================================
for epoch in range(0,5000):
loss = 0
for id_ in range(0,N):
dWs = 0*Ws
dWo = 0*Wo
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
A_1 = sigmoid(Z_1)
Z_2 = np.dot(Wo,A_1)
y = sigmoid(Z_2)
d = output[id_]
for j in range(0,Nh):
for i in range(0,No):
dWo[i,j] = dWo[i,j] + sigmoid(Z_1[j])*(y[i]-d)
Wo = Wo - alpha*dWo
for k in range(0,Ni+1):
for j in range(0,Nh):
for i in range(0,No):
dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*sigmoid(Z_1[j])*(1-sigmoid(Z_1[j]))*(y[i]-d)
Ws = Ws - alpha*dWs
loss = loss + 1/2*np.linalg.norm(y-d)
if np.mod(epoch,50) == 0:
print(epoch,"-th epoch trained")
t_ = np.append(t_,epoch)
loss_ = np.append(loss_,loss)
fig = plt.figure(num=0,figsize=[10,5])
plt.plot(t_,loss_,marker="")
plt.title('Loss decay')
plt.xlabel('epoch',FontSize=20)
plt.ylabel('Loss',FontSize=20)
plt.show()
## figure out the function shape the model==========================================
xn = np.linspace(0,1,20)
yn = np.linspace(0,1,20)
xm, ym = np.meshgrid(xn, yn)
xx = np.reshape(xm,np.size(xm,0)*np.size(xm,1))
yy = np.reshape(ym,np.size(xm,0)*np.size(xm,1))
Z = []
for id__ in range(0,np.size(xm)):
x = np.append([xx[id__],yy[id__]],[1,1])
Z_1 = np.dot(Ws,x)
y_ = sigmoid(np.dot(Wo,sigmoid(Z_1)))
Z = np.append(Z,y_)
fig = plt.figure(num=1,figsize=[10,5])
ax = fig.gca(projection='3d')
surf = ax.plot_surface(xm,ym,np.reshape(Z,(np.size(xm,0),np.size(xm,1))),cmap='coolwarm',linewidth=0,antialiased=False)
print("====================================================================")
plt.show()
## test the trained model ====================================================================
for id_ in range(0,N):
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
y = sigmoid(np.dot(Wo,sigmoid(Z_1)))
print(y)