0

This is my inference code for training and plotting the Lunar Lander by OpenAI.

def plot_learning_curve(x, scores, figure_file):
    running_avg = np.zeros(len(scores))
    for i in range(len(running_avg)):
        running_avg[i] = np.mean(scores[max(0, i-100):(i+1)])
    plt.plot(x, running_avg)
    plt.title('Running average of previous 100 scores')
    plt.savefig(figure_file)

env = gym.make('LunarLanderContinuous-v2')

agent = Agent(alpha=0.0001, beta=0.001,input_dims=env.observation_space.shape, tau=0.001,batch_size=64, fc1_dims=400, fc2_dims=300,n_actions=env.action_space.shape[0])
n_games = 1000

filename = 'LunarLander_alpha_' + str(agent.alpha) + '_beta_' + str(agent.beta) + '_' + str(n_games) + '_games'
figure_file = 'plots/' + filename + '.png'

best_score = env.reward_range[0]
score_history = []

for i in range(n_games):
    observation = env.reset()
    done = False
    score = 0
    agent.noise.reset()

    while not done:
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        agent.remember(observation, action, reward, observation_, done)
        agent.learn()
        score += reward
        observation = observation_

    score_history.append(score)
    avg_score = np.mean(score_history[-100:])

    if avg_score > best_score:
        best_score = avg_score
        agent.save_models()

    print('episode ', i, 'score %.1f' % score,'average score %.1f' % avg_score)
x = [i+1 for i in range(n_games)]
plot_learning_curve(x, score_history, figure_file)

It gives me a nice plot but I'd like to either live-play or at least save video file of the visual animation of the lunar lander at each progression.

How can I go about doing this?

Onur-Andros Ozbek
  • 2,998
  • 2
  • 29
  • 78

0 Answers0