I have a network that takes two inputs, the first input is passed through a series of convolutional layers, before being flattened and concatenated with the second input. The result is then passed through a number of dense layers. The second portion of this network is an actor critic network. So the network also has two outputs. I'm attempting to perform gradient descent given two distinct losses. One for the actor, one for the critic. However I'm running into the error: 'No gradients provided for any variable: (['conv2d/kernel:0', 'conv2d/bias:0', 'co'...
The following learn function is responsible for performing the gradient descent:
def learn(self):
for _ in range(self.n_epochs):
state_arr, additional_info, action_arr, old_prob_arr, values, reward_arr, _, trades_complete, env_states, batches = self.memory.generate_batches()
advantage = np.zeros(len(reward_arr), dtype=np.float32)
for t in range(len(reward_arr)-1):
a_t = 0 # advantage value at timestep t
for k in range(t, len(reward_arr)-1):
a_t += (reward_arr[k] + values[k+1] * (1-int(trades_complete[k])) - values[k])
advantage[t] = a_t
for batch in batches:
with tf.GradientTape(persistent=True) as tape:
new_probs, new_val = self.cnn_actor_critic([state_arr[batch], additional_info[batch]])
masked_new_probs = ENVIRONMENT.mass_apply_mask(new_probs.numpy(), env_states[batch])
new_log_probs_of_old_actions = []
for index, val in enumerate(masked_new_probs):
new_log_probs_of_old_actions.append(-np.log(val[action_arr[batch][index]+1]))
new_log_probs_of_old_actions = np.array(new_log_probs_of_old_actions)
new_val = new_val.numpy()
critic_value = tf.squeeze(new_val, 1)
returns = advantage[batch] + values[batch]
critic_loss = tf.keras.losses.MSE(critic_value, returns)
prob_ratio = tf.math.exp(new_log_probs_of_old_actions - old_prob_arr)
weighted_probs = advantage[batch] * prob_ratio
clipped_probs = tf.clip_by_value(prob_ratio, 1-self.policy_clip, 1+self.policy_clip)
weighted_clipped_probs = clipped_probs * advantage[batch]
l_clip = tf.math.minimum(weighted_probs, weighted_clipped_probs) # prviously actor_loss
entropy_term = - np.sum(np.multiply(new_log_probs_of_old_actions, np.log(new_log_probs_of_old_actions)))
l_q_extension = self.c1*critic_loss.numpy()-self.c2*entropy_term
l_q = -1*l_clip+l_q_extension
actor_critic_cnn_loss = tf.math.reduce_mean(l_q)
cnn_actor_critic_params = self.cnn_actor_critic.trainable_weights
actor_critic_grads = tape.gradient([actor_critic_cnn_loss, critic_loss], cnn_actor_critic_params)
self.cnn_actor_critic.optimizer.apply_gradients(zip(actor_critic_grads, self.cnn_actor_critic.trainable_variables))
self.memory.clear_memory()
and it's the second last line raising the error:
self.cnn_actor_critic.optimizer.apply_gradients(zip(actor_critic_grads, self.cnn_actor_critic.trainable_variables))