Thanks for reading my question.
I was using keras to develop my reinforcement learning agent based on keres-rl. But I want to upgrade my agent so that I get some update from open ai base line code for better action exploration. But the code used tensorflow only. It is my first time to use tensorflow. I am so confused. I build keras deep learninng model using its "Model API". I have never concerned about inside of model. But the code I referenced was full of the code that kick in inside of deep learning model and give some change to its weight and get immediate layer output using tf.Session(). The framework is so flexible. Like below, using tf.Session() the tensor, which is recognized tensor and is not callable, can get result feeding feed_dict data. In keras, it is impossible as far as I know.
Once I allow using tf.Session(), my architecture will be complex and nobody wants to understand and use it except that I can adapt reference code more easily.
On the other side, if I don't allow that, I needs to break down my existing model and use tons of K.function to get middle layer's output or something that I can't get from keras model.
import numpy as np
from keras.layers import Dense, Input, BatchNormalization
from keras.models import Model
import tensorflow as tf
import keras.backend as K
import rl2.tf_util as U
def normalize(x, stats):
if stats is None:
return x
return (x - stats.mean) / (stats.std + 1e-8)
class RunningMeanStd(object):
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
def __init__(self, my, epsilon=1e-2, shape=()):
self._sum = K.variable(value=np.zeros(shape), dtype=tf.float32, name=my+"_runningsum")
self._sumsq = K.variable(value=np.zeros(shape) + epsilon, dtype=tf.float32, name=my+"_runningsumsq")
self._count = K.variable(value=np.zeros(()) + epsilon, dtype=tf.float32, name=my+"_count")
self.mean = self._sum / self._count
self.std = K.sqrt(K.maximum((self._sumsq / self._count) - K.square(self.mean), epsilon))
newsum = K.variable(value=np.zeros(shape), dtype=tf.float32, name=my+'_sum')
newsumsq = K.variable(value=np.zeros(shape), dtype=tf.float32, name=my+'_var')
newcount = K.variable(value=np.zeros(()), dtype=tf.float32, name=my+'_count')
self.incfiltparams = K.function([newsum, newsumsq, newcount], [],
updates=[K.update_add(self._sum, newsum),
K.update(self._sumsq, newsumsq),
K.update(self._count, newcount)])
def update(self, x):
x = x.astype('float64')
n = int(np.prod(self.shape))
totalvec = np.zeros(n*2+1, 'float64')
addvec = np.concatenate([x.sum(axis=0).ravel(), np.square(x).sum(axis=0).ravel(), np.array([len(x)],dtype='float64')])
self.incfiltparams(totalvec[0:n].reshape(self.shape),
totalvec[n:2*n].reshape(self.shape),
totalvec[2*n])
i = Input(shape=(1,))
# h = BatchNormalization()(i)
h = Dense(4, activation='relu', kernel_initializer='he_uniform')(i)
h = Dense(10, activation='relu', kernel_initializer='he_uniform')(h)
o = Dense(1, activation='linear', kernel_initializer='he_uniform')(h)
model = Model(i, o)
obs_rms = RunningMeanStd(my='obs', shape=(1,))
normalized_obs0 = K.clip(normalize(i, obs_rms), 0, 100)
tf2 = model(normalized_obs0)
# print(model.predict(np.asarray([2,2,2,2,2]).reshape(5,)))
# print(tf(np.asarray([2,2,2,2,2]).reshape(5,)))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run([tf2], feed_dict={i : U.adjust_shape(i, [np.asarray([2,]).reshape(1,)])}))