I'm trying to convert my tensorflow code to tensorflow eager. The problem is the forward pass predicts only the same actions for different input values in eager mode. The normal tensorflow code with graph works fine. I've only changed the network. The agent is the same I'm used with normal tensorflow. What could be the problem with the network? The forward pass is in the function get_probs()
Another issue is the eager network is very slow. I think the graph execution is 2-3 times faster.
Example probs for one episode in eager
...
[0.31471518 0.33622807 0.34905672]
[0.31472355 0.3363353 0.34894115]
[0.31482834 0.33600125 0.34917045]
[0.31461707 0.33643782 0.34894508]
[0.31466153 0.33620775 0.34913075]
[0.31461093 0.33637658 0.3490125 ]
[0.31452385 0.33623937 0.34923682]
[0.31438416 0.33645296 0.3491629 ]
[0.31471425 0.3363982 0.34888753]
[0.314866 0.33610862 0.34902537]
[0.31489033 0.33622313 0.34888652]
...
Example probs for one episode with tensorflow graph
...
[0.25704077 0.46056205 0.28239718]
[0.20610097 0.49288744 0.30101162]
[0.24638997 0.5338215 0.2197885 ]
[0.22581507 0.51206875 0.2621162 ]
[0.19064051 0.5398092 0.26955026]
[0.24399564 0.4424694 0.313535 ]
[0.25321653 0.48051655 0.26626688]
[0.2241595 0.43447506 0.3413655 ]
[0.20665398 0.5128011 0.28054494]
[0.2943201 0.39530927 0.3103706 ]
...
Network
import tensorflow as tf
from keras.layers import *
import numpy as np
tf.enable_eager_execution()
print(tf.executing_eagerly())
class PGEagerAtariNetwork:
def __init__(self, state_space, action_space, lr):
self.state_space = state_space
self.action_space = action_space
self.model = tf.keras.Sequential()
self.model.add(InputLayer(input_shape=(84, 84, 4)))
# Conv
self.model.add(Conv2D(filters=32, kernel_size=[8, 8], strides=[4, 4], activation='relu', name='conv1'))
self.model.add(Conv2D(filters=64, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv2'))
self.model.add(Conv2D(filters=128, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv3'))
# Flatten
self.model.add(Flatten(name='flatten'))
# Fully connected
self.model.add(Dense(units=512, activation='relu', name='fc1'))
# Logits
self.model.add(Dense(units=self.action_space, activation=None, name='logits'))
self.model.summary()
# Optimizer
self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
def get_probs(self, s):
s = s[np.newaxis, :]
logits = self.model(s)
probs = tf.nn.softmax(logits).numpy().squeeze()
return probs
def update_policy(self, s, r, a):
with tf.GradientTape() as tape:
loss = self.calc_loss(s, r, a)
grads = tape.gradient(loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables),
global_step=tf.train.get_or_create_global_step())
def calc_loss(self, s, r, a):
logits = self.model(s)
policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=a, logits=logits)
policy_loss = tf.reduce_mean(policy_loss * tf.stop_gradient(r))
loss = tf.reduce_mean(policy_loss)
return loss