I recently started to learn deep learning and I tried to write a forward and backward propagation from the beginning but I think there is a problem with my code so I know this is hard to find the problem and you may find it silly to code from the beginning and not using frameworks but help me if you can. I tried this code on MNIST data set and the soft max out for test was 0.1 for all 10 possible answer the code to start W and b:
def p_init(dim1,dim2):
W=np.random.randn(dim2,dim1)*0.01
b=np.zeros((dim2,1))
return W,b
def mp_init(L_dim):
Lnum=len(L_dim)-1
W={}
b={}
for i in range(Lnum):
W[str(i+1)],b[str(i+1)]=p_init(L_dim[i],L_dim[i+1])
return W,b
code for relu and its derevitive:
def relu(z):
return np.maximum(0,z)
def drelu(z):
return np.greater(z, 0).astype(int)
code for sigmoid and its derivative L
def sigmoid(z):
return expit(z)
def dsigmoid(z):
s=sigmoid(z)
return s*(1-s)
code for final layer with softmax:
def softmax(z):
exps=np.exp(z - np.max(z))
return exps/np.sum(exps)
code for froward :
def forward(W,b,X,L_num,activation):
Z={}
Z['0']=np.array([1])
#print("z0")
A={}
A['0']=X
#print("a0")
for i in range (L_num):
Z[str(i+1)]=np.dot(W[str(i+1)],A[str(i)])+b[str(i+1)]
print('Z['+str(i+1)+']=np.dot(W['+str(i+1)+'],A['+str(i)+'])+b['+str(i+1)+']')
if (activation[i]==0):
A[str(i+1)]=sigmoid(Z[str(i+1)])
print('A['+str(i+1)+']=sigmoid(Z['+str(i+1)+'])')
elif (activation[i]==1):
A[str(i+1)]=relu(Z[str(i+1)])
print('A['+str(i+1)+']=relu(Z['+str(i+1)+'])')
elif (activation[i]==2):
A[str(i+1)]=softmax(Z[str(i+1)])
print('A['+str(i+1)+']=softmax(Z['+str(i+1)+'])')
return Z,A
code for backward propagation :(highly suspected for error):
def backward(A,Y,Z,L_num,activation,m):
dW={}
db={}
dZ={}
dZ[str(L_num)]=A[str(L_num)]-Y
print('dZ['+str(L_num)+']=np.subtract(A['+str(L_num)+'],Y)')
for i in range(1,L_num+1):
dW[str(L_num-i+1)]=np.dot(dZ[str(L_num-i+1)],A[str(L_num-i)].T)/m
print('dW['+str(L_num-i+1)+']=np.dot(dZ['+str(L_num-i+1)+'],A['+str(L_num-i)+'].T)/m')
db[str(L_num-i+1)]=np.sum( dZ[str(L_num-i+1)],axis=1,keepdims=True)/m
print('db['+str(L_num-i+1)+']=np.sum( dZ['+str(L_num-i+1)+'],axis=1,keepdims=True)/m')
if (activation[L_num-(i+1)]==0):
#print(i)
dZ[str(L_num-i)]=np.dot(W[str(L_num-i+1)].T,dZ[str(L_num-i+1)])*dsigmoid(Z[str(L_num-i)])
print('dZ['+str(L_num-i)+']=np.dot(W['+str(L_num-i+1)+'].T,dZ['+str(L_num-i+1)+'])*dsigmoid(Z['+str(L_num-i)+'])')
elif (activation[L_num-(i+1)]==1):
#print(i)
dZ[str(L_num-i)]=np.dot(W[str(L_num-i+1)].T,dZ[str(L_num-i+1)])*drelu(Z[str(L_num-i)])
print('dZ['+str(L_num-i)+']=np.dot(W['+str(L_num-i+1)+'].T,dZ['+str(L_num-i+1)+'])*drelu(Z['+str(L_num-i)+'])')
#elif(activation[L_num-(i+1)]==2):there is no need to compute derivative of softmax
#print(i)
#dZ[str(L_num-i)]=np.dot(W[str(L_num-i+1)].T,dZ[str(L_num-i+1)])*dsoftmax(Z[str(L_num-i)])
return dZ,dW,db
code for update
def update(W,b,dW,db,L_num,alpha):
for i in range (L_num):
W[str(i+1)]= W[str(i+1)]-(alpha*dW[str(i+1)])
print('W['+str(i+1)+']= W['+str(i+1)+']-(alpha*dW['+str(i+1)+'])')
b[str(i+1)]= b[str(i+1)]-(alpha*db[str(i+1)])
print('b['+str(i+1)+']= b['+str(i+1)+']-(alpha*db['+str(i+1)+'])')
return W,b
code for cross-entropy :
def cross_entropy(Y,A,m):
S=Y*np.log(A)
L=np.sum(S)/(-1*m)
return L
code for starting W and b and inial parameters :
m=60000####
epoch=500
alpha=0.1
L_num=3
L_dim=np.array([784,500,125,10])
activation=np.array([0,1,2])
code for my oneHotEncoder:
def adjust_y(Y,final_layer):
m=Y.shape[0]
Ya=np.zeros((final_layer,m))
#print(Ya.shape)
for i in range(m):
#print(i)
Ya[int(Y[i])][i]=1#y[i]-1
return Ya
code to import X and Y's:
X1=inp.convert_from_file('TrainX')
X=np.reshape(X1,(60000,784)).T
Y1=inp.convert_from_file('TrainY')
Y1=np.reshape(Y1,(60000,1))
Y=adjust_y(Y1,10)
X2=inp.convert_from_file('TestX')
X2=np.reshape(X2,(10000,784)).T
Y2=inp.convert_from_file('TestY')
Y2=np.reshape(Y2,(10000,1))
code to start training:
for i in range (epoch) :
Z,A=forward(W,b,X,L_num,activation)
#print("Cost"+str(i)+" : "+str(costf(Y,A[str(L_num)],m)))
print("CrossEntropy "+str(i)+" : "+str(cross_entropy(Y,A[str(L_num)],m)))
dZ,dW,db=backward(A,Y,Z,L_num,activation,m)
W,b=update(W,b,dW,db,L_num,alpha)