I'm new to Tensorflow and i'm trying to impletement LSTM encoder-decoder from scratch using tensorflow, according to this blog post: https://medium.com/@shiyan/understanding-lstm-and-its-diagrams-37e2f46f1714
This is the code for the encoder (using tf.while_loop)
def decode_timestep(self, context, max_dec, result, C_t_d, H_t_d, current_index):
with tf.variable_scope('decode_scope', reuse=True):
W_f_d = tf.get_variable('W_f_d', (self.n_dim, self.n_dim), tf.float32)
U_f_d = tf.get_variable('U_f_d', (self.n_dim, self.n_dim), tf.float32)
# ...
# Decoder
# Forget Gate
f_t_d = tf.nn.sigmoid(
tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_f_d), tf.matmul(H_t_d, U_f_d))), b_f_d))
C_t_d_f = tf.multiply(C_t_d, f_t_d)
# Input Gate
# Part 1. New Memory Impaction
i_t_d = tf.nn.sigmoid(
tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_i_d), tf.matmul(H_t_d, U_i_d))), b_i_d))
# Part 2. Calculate New Memory
c_d_new = tf.nn.tanh(tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_c_d), tf.matmul(H_t_d, U_c_d))), b_c_d))
# Part 3. Update old Memory
C_t_d_i = tf.add(C_t_d_f, tf.multiply(i_t_d, c_d_new))
# Output Gate
o_t_d = tf.nn.sigmoid(
tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_o_d), tf.matmul(H_t_d, U_o_d))), b_o_d))
# Calculate new H_t
H_t_d_new = tf.multiply(o_t_d, tf.nn.tanh(C_t_d))
# Write the result of this timestep to the tensor array at pos current_index
result.write(tf.subtract(tf.subtract(max_dec, 1), current_index), H_t_d_new)
# Decrement the current_index by 1
index_next = tf.subtract(current_index, 1)
return context, max_dec, result, C_t_d_i, H_t_d_new, index_next
This is the code for the decoder:
def decode_timestep(self, context, max_dec, result, C_t_d, H_t_d, current_index):
# Same as above
# Calculate new H_t
H_t_d_new = tf.multiply(o_t_d, tf.nn.tanh(C_t_d))
# Write the result of this timestep to the tensor array at pos current_index
result.write(tf.subtract(tf.subtract(max_dec, 1), current_index), H_t_d_new)
# Decrement the current_index by 1
index_next = tf.subtract(current_index, 1)
return context, max_dec, result, C_t_d_i, H_t_d_new, index_next
And this is the code for the session:
with tf.variable_scope('encode_scope', reuse=True):
H_t_e = tf.get_variable('H_t_e')
C_t_e = tf.get_variable('C_t_e')
with tf.variable_scope('global_scope', reuse=True):
current_index = tf.get_variable('current_index', dtype=tf.int32)
context, _, C_t_e, H_t_e, current_index = tf.while_loop(self.encode_timestep_cond, self.encode_timestep,
[X_Sent, max_enc, C_t_e, H_t_e, current_index])
result = tf.TensorArray(tf.float32, size=max_dec)
_,_,result, C_t_e, H_t_e, current_index = tf.while_loop(self.decode_timestep_cond, self.decode_timestep,
[context, max_dec, result, C_t_e, H_t_e, current_index])
loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(result.concat(), Y_Sent)), reduction_indices=1)))
train_step = tf.train.AdamOptimizer().minimize(loss)
The error is :
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["", ...
Please help! This is the implementation that i think it should right after what i read and understand from that post. Thank you and sorry about my english!