I want to train my reinforcement learning model preliminary with gym environment, and then to deploy it in real environment to continue with reinforcement learning in real environment.
I am using TF, Keras RL + gym for initial training, the code is below What is the way to manage that? Lost in googling of that
My guess is that in real environment I should have 2 agent, one for prediction and another for further training. Training agent should work based on state-action samples collected in runtime and then this new trained model should be merged to predicting model. If it's correct assumption, how can it be implemented?
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
# custom gym environment
env = FooEnv()
env.seed(0)
states = env.observation_space.shape
actions = env.action_space.n
def build_model(states, actions):
model = Sequential()
model.add(Flatten(input_shape=(1,) + states))
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
from rl.agents import DQNAgent
from rl.callbacks import ModelIntervalCheckpoint, FileLogger
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
model = build_model(states, actions)
model.summary()
def build_agent(model, actions):
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=0.1, value_test=0.05,
nb_steps=500)
memory = SequentialMemory(limit=10000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_double_dqn=True,
nb_actions=actions, gamma=.98, nb_steps_warmup=100, target_model_update=1e-2)
return dqn
def build_callbacks(env_name):
checkpoint_weights_filename = 'weights/dqn_' + env_name + '_weights_{step}.h5f'
log_filename = 'weights/dqn_{}_log.json'.format(env_name)
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000)]
callbacks += [FileLogger(log_filename, interval=100)]
return callbacks
callbacks = build_callbacks('FooEnv')
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=30000, log_interval=1000, nb_max_episode_steps=50, visualize=False, verbose=1, callbacks=callbacks)
scores = dqn.test(env, nb_episodes=1, visualize=True)
dqn.save_weights('weights/saved_weights')