I'm trying to build a siamese neural network for human pose estimation based on Hash matching.
The basic concept and references are described in my previous post
I fixed the "no gradients provided for any variable" error but found that my model's loss value was not improving no matter how long the training was.
I saved checkpoints on 100th,10000th and 500000th iteration and the restored models have the same magnitude of loss value.
Currently I consider there might be two causes of this problem:
1.Bad loss function design:
The design was borrowed from Openpose, which is the difference between "distance map" and the "label map".
Unlike the openpose project, the "distance map" was obtained by computing Hamming distances to the "Hint Hash". This process contain lots of unconventional operations and binarization, which may cause the model to be not trainable.
2.Wrong traing loop design:
During the training, I found every saved checkpoint files were named "XXXX.ckpt.data-00000-of-00001", the XXXX part will change, but the suffix "00000-of-00001"kept reappearing.
I suspect there was something wrong in my model or training loop and caused it to repeat doing the first step of training?
I'm still trying to find resources to fix this problem, any of your comment may help me a lot!
Here's the code:
import tensorflow as tf
import numpy as np
import time
from imageLoader import getPaddedROI,training_data_feeder
import math
import cv2
tf.reset_default_graph()
def truncated_normal_var(name,shape,dtype):
return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.truncated_normal_initializer(stddev=0.01)))
def zero_var(name,shape,dtype):
return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))
roi_size = 23
image_input_size = 301
#input placeholders
#batch1 hints
inputs_b1h1 = tf.placeholder(tf.float32, ( 16, roi_size, roi_size, 3), name='inputs_b1h1')
#inputs_b1h2 = tf.placeholder(tf.float32, ( 16, roi_size, roi_size, 3), name='inputs_b1h2')
inputs_s = tf.placeholder(tf.float32, (None, image_input_size, image_input_size, 3), name='inputs_s')
labels = tf.placeholder(tf.float32,(16,76,76), name='labels')
#define the model
def paraNet(inputs, inputs_s , ground_truth_labels ):
with tf.variable_scope('conv'):
out_l1 = tf.layers.conv2d(inputs, 16, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_1')
out_l1r = tf.nn.relu(out_l1)
out_l2 = tf.layers.conv2d(out_l1r, 48, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_2')
out_l2r = tf.nn.relu(out_l2)
out_l3 = tf.layers.conv2d(out_l2r, 96, [5, 5],strides=(1, 1), padding ='valid' ,name='para_conv_3')
out_l3r = tf.nn.relu(out_l3)
out_l4 = tf.layers.conv2d(out_l3r, 32, [1, 1],strides=(1, 1), padding ='valid' ,name='para_conv_4')
hint = tf.squeeze( tf.sign( tf.sigmoid(out_l4) ) )
with tf.variable_scope('conv', reuse=tf.AUTO_REUSE ):
out_2_l1 = tf.layers.conv2d(inputs_s, 16, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_1')
out_2_l1r = tf.nn.relu(out_2_l1)
out_2_l2 = tf.layers.conv2d(out_2_l1r, 48, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_2')
out_2_l2r = tf.nn.relu(out_2_l2)
out_2_l3 = tf.layers.conv2d(out_2_l2r, 96, [5, 5],strides=(1, 1), padding ='same' ,name='para_conv_3')
out_2_l3r = tf.nn.relu(out_2_l3)
out_2_l4 = tf.layers.conv2d(out_2_l3r, 32, [1, 1],strides=(1, 1), padding ='same' ,name='para_conv_4')
sample =tf.sign( tf.sigmoid(out_2_l4))
map0 = tf.reduce_sum ( tf.abs (tf.subtract( hint[0] , sample ) ) , axis=3 )
map1 = tf.reduce_sum ( tf.abs (tf.subtract( hint[1] , sample ) ) , axis=3 )
map2 = tf.reduce_sum ( tf.abs (tf.subtract( hint[2] , sample ) ) , axis=3 )
map3 = tf.reduce_sum ( tf.abs (tf.subtract( hint[3] , sample ) ) , axis=3 )
map4 = tf.reduce_sum ( tf.abs (tf.subtract( hint[4] , sample ) ) , axis=3 )
map5 = tf.reduce_sum ( tf.abs (tf.subtract( hint[5] , sample ) ) , axis=3 )
map6 = tf.reduce_sum ( tf.abs (tf.subtract( hint[6] , sample ) ) , axis=3 )
map7 = tf.reduce_sum ( tf.abs (tf.subtract( hint[7] , sample ) ) , axis=3 )
map8 = tf.reduce_sum ( tf.abs (tf.subtract( hint[8] , sample ) ) , axis=3 )
map9 = tf.reduce_sum ( tf.abs (tf.subtract( hint[9] , sample ) ) , axis=3 )
map10 = tf.reduce_sum ( tf.abs (tf.subtract( hint[10] , sample ) ) , axis=3 )
map11 = tf.reduce_sum ( tf.abs (tf.subtract( hint[11] , sample ) ) , axis=3 )
map12 = tf.reduce_sum ( tf.abs (tf.subtract( hint[12] , sample ) ) , axis=3 )
map13 = tf.reduce_sum ( tf.abs (tf.subtract( hint[13] , sample ) ) , axis=3 )
map14 = tf.reduce_sum ( tf.abs (tf.subtract( hint[14] , sample ) ) , axis=3 )
map15 = tf.reduce_sum ( tf.abs (tf.subtract( hint[15] , sample ) ) , axis=3 )
totoal_map =tf.div( tf.concat([map0, map1, map2, map3, map4, map5, map6, map7,
map8, map9, map10,map11,map12, map13, map14, map15], 0) , 64)
loss = tf.nn.l2_loss( totoal_map - ground_truth_labels , name = 'loss' )
return loss, totoal_map
loss, totoal_map = paraNet(inputs_b1h1, inputs_s, labels)
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
#writer = tf.summary.FileWriter("./variable_graph",graph = sess.graph)
sess.run(init)
#load image from dataset(train set)
joint_data_path = "./custom_data.json"
train_val_path = "./train_val_indices.json"
imgpath = "./000/"
input_size = 301
hint_roi_size = 23
#saver.restore(sess, "./temp_model/model5.ckpt")
for i in range(5000):
#load data
hintSet01,hintSet02,t_img,t_label_norm = training_data_feeder(joint_data_path, train_val_path, imgpath, input_size, hint_roi_size )
#Normalize the image pixel values to 0~1
hintSet01_norm = []
hintSet02_norm = []
t_img =[ np.float32(t_img /255.0) ]
for rois in hintSet01:
tmp = np.float32(rois / 255.0)
hintSet01_norm.append(tmp.tolist())
for rois in hintSet02:
tmp = np.float32(rois / 255.0)
hintSet02_norm.append(tmp.tolist())
loss_val, _ = sess.run([loss, train_step] ,
feed_dict = {inputs_s: t_img,
inputs_b1h1: hintSet01_norm,
labels: t_label_norm })
if i % 50 == 0:
print(loss_val)
#save_path = saver.save(sess, "./temp_model/model" + '5' + ".ckpt")
Here is the github repo and the dataset links on github here.
2018.11.9 Update:
I borrowed a binarization method from "Binarized Neural Network".
Here's the code:
def ste_binarize( value ):
"""
Clip and binarize tensor using the straight through estimator (STE) for the gradient.
The gradient of tf.sign(x) will always be zero thus it use:
g.gradient_override_map({"Sign" : "Identity"})
This will replace the gradient of tf.sign(x) with the original gradient of x(which is the Identity).
"""
g = tf.get_default_graph()
with ops.name_scope("Binarized") as name:
with g.gradient_override_map({"Sign" : "Identity"}):
return tf.sign(value)
This method works well in the original project on Github I replaced the original tf.sign() with this method and tried training again. Sadly, it didn't work on my case. I'll change the structure of my model and try to record the gradient value. If I make any progress, I'll update this post.
2018.11.22 Update: I changed the model structure and trained it again. This time I discovered a strange behavior of the loss value: While training, the loss value was actually going up and freeze at a certain point.
Here's the new model:
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.platform import gfile
from progress.bar import Bar
import numpy as np
import time
from datetime import datetime
import math
import matplotlib.pyplot as plt
import cv2
from imageLoader import getPaddedROI,training_batch_generator
tf.reset_default_graph()
#load image from dataset(train set)
joint_data_path = "./custom_data.json"
train_val_path = "./train_val_indices.json"
imgpath = "./000/"
input_size = 301
hint_roi_size = 23
roi_size = 23
image_input_size = 301
batch_number =10
def truncated_normal_var(name,shape,dtype):
return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.truncated_normal_initializer(stddev=0.01)))
def zero_var(name,shape,dtype):
return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))
def ste_binarize( value ):
"""
Clip and binarize tensor using the straight through estimator (STE) for the gradient.
The gradient of tf.sign(x) will always be zero so it use:
g.gradient_override_map({"Sign" : "Identity"})
This will replace the gradient of tf.sign(x) with the original gradient of x(which is the Identity).
"""
g = tf.get_default_graph()
with ops.name_scope("Binarized") as name:
with g.gradient_override_map({"Sign" : "Identity"}):
return tf.sign(value)
#define the model
def paraNet(hint_inputs, sample_inputs):
def paraConv(inputs):
out_l1 = tf.layers.conv2d(inputs , 16, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_1')
out_l1r = tf.nn.relu(out_l1)
out_l2 = tf.layers.conv2d(out_l1r, 48, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_2')
out_l2r = tf.nn.relu(out_l2)
out_l3 = tf.layers.conv2d(out_l2r, 96, [5, 5],strides=(1, 1), padding ='valid' ,name='para_conv_3')
out_l3r = tf.nn.relu(out_l3)
out_l4 = tf.layers.conv2d(out_l3r, 32, [1, 1],strides=(1, 1), padding ='valid' ,name='para_conv_4')
hint =tf.reshape( tf.squeeze( ste_binarize( out_l4 ) ), [batch_number, 1 , 1 , 32] )
return hint
with tf.variable_scope('conv'):
hint00 = paraConv(hint_inputs[:,0,:,:,:])
with tf.variable_scope('conv', reuse= True ):
hint01 = paraConv(hint_inputs[:,1,:,:,:])
hint02 = paraConv(hint_inputs[:,2,:,:,:])
hint03 = paraConv(hint_inputs[:,3,:,:,:])
hint04 = paraConv(hint_inputs[:,4,:,:,:])
hint05 = paraConv(hint_inputs[:,5,:,:,:])
hint06 = paraConv(hint_inputs[:,6,:,:,:])
hint07 = paraConv(hint_inputs[:,7,:,:,:])
hint08 = paraConv(hint_inputs[:,8,:,:,:])
hint09 = paraConv(hint_inputs[:,9,:,:,:])
hint10 = paraConv(hint_inputs[:,10,:,:,:])
hint11 = paraConv(hint_inputs[:,11,:,:,:])
hint12 = paraConv(hint_inputs[:,12,:,:,:])
hint13 = paraConv(hint_inputs[:,13,:,:,:])
hint14 = paraConv(hint_inputs[:,14,:,:,:])
hint15 = paraConv(hint_inputs[:,15,:,:,:])
out_2_l1 = tf.layers.conv2d(sample_inputs, 16, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_1')
out_2_l1r = tf.nn.relu(out_2_l1)
out_2_l2 = tf.layers.conv2d(out_2_l1r, 48, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_2')
out_2_l2r = tf.nn.relu(out_2_l2)
out_2_l3 = tf.layers.conv2d(out_2_l2r, 96, [5, 5],strides=(1, 1), padding ='same' ,name='para_conv_3')
out_2_l3r = tf.nn.relu(out_2_l3)
out_2_l4 = tf.layers.conv2d(out_2_l3r, 32, [1, 1],strides=(1, 1), padding ='same' ,name='para_conv_4')
#sample =tf.sign( tf.sigmoid(out_2_l4))
sample =ste_binarize( out_2_l4)
#originalMap = tf.reduce_sum ( tf.abs (tf.subtract( hint00 , sample ) ) , axis=3 )
map0 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint00 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map1 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint01 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map2 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint02 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map3 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint03 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map4 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint04 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map5 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint05 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map6 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint06 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map7 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint07 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map8 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint08 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map9 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint09 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map10 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint10 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map11 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint11 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map12 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint12 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map13 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint13 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map14 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint14 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
map15 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint15 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76] )
totoal_map =tf.div( tf.concat([map0, map1, map2, map3, map4, map5, map6, map7,
map8, map9, map10,map11,map12, map13, map14, map15], 1) , 32)
return totoal_map
inputs_b1h1 = tf.placeholder(tf.float32, ( None, 16, roi_size, roi_size, 3), name='inputs_b1h1')
inputs_s = tf.placeholder(tf.float32, (None, image_input_size, image_input_size, 3), name='inputs_s')
ground_truth_labels = tf.placeholder(tf.float32,(None, 16,76,76), name='labels')
mtotoal_map = paraNet(inputs_b1h1 , inputs_s )
mloss = tf.nn.l2_loss( ground_truth_labels - mtotoal_map, name = 'loss' )
train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(mloss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
#writer = tf.summary.FileWriter("./variable_graph",graph = sess.graph)
sess.run(init)
#saver.restore(sess, "./temp_model/model7.ckpt")
loss_per_iteration = []
for i in range(100):
#load data
hintSet01,hintSet02,t_img,t_label_norm = training_batch_generator(joint_data_path, train_val_path, imgpath, input_size, hint_roi_size,batch_number)
loss_val, _ = sess.run([mloss, train_step] ,
feed_dict = {inputs_s: t_img,
inputs_b1h1: hintSet01,
ground_truth_labels: t_label_norm })
loss_per_iteration.append(loss_val)
if i % 5 == 0:
print(loss_val)
plt.plot(loss_per_iteration)
plt.show()
save_path = saver.save(sess, "./temp_model/model" + '7' + ".ckpt")