1
import tensorflow as tf
import keras
import numpy as np
import gym
import random
from keras.layers import *

model = keras.models.Sequential()

model.add(Dense(12,activation = 'tanh',input_shape = (4,)))
model.add(Dense(2,activation = 'linear')) 

model.compile(optimizer = 'adam',loss = 'MSE',metrics = ['accuracy'])

env = gym.make("CartPole-v1")

def preprocessing(state):
    return np.reshape(state,(1,4))

replay_mem = []
replay_size = 32
reward_list = []
local_mean = list()
for episode in range(2000):

    done = False
    state = env.reset()
    count = 0
    reward_tot = 0
    model.save("cartpole-dqn.h5")

    while not done:
        count += 1
        e = (1/(episode/200+1))
        
        
        #epslion greedy search
        Q = model.predict(preprocessing(state))
        if e>np.random.rand(1):
            action = env.action_space.sample()
        else:
            action = np.argmax(Q)
            
            
        #take_action and set reward
        state_next, reward, done, info = env.step(action)
        if done:
            reward = - 100
            
        
        #replay_mem
        replay_mem.append([state,action,reward,state_next,done])
        
        if len(replay_mem)>2048:
            del replay_mem[0]
            
            
        state = state_next
        reward_tot += reward
        state = state_next
        
        Q_list = []
        state_list = []
        
        
        #set this_replay_Size
        if len(replay_mem)<replay_size:
            this_replay_size = len(replay_mem)
        else:
            this_replay_size = replay_size
            
            
        #sample random batch from replay_memory
        for sample in random.sample(replay_mem,this_replay_size):
            state_m,action_m,reward_m,state_next_m,done_m = sample
            if done:
                Q[0,action] = reward_m
            else:
                Q_new = model.predict(preprocessing(state_next_m))
                Q[0,action] = reward_m + 0.97*np.max(Q_new)
            Q_list.append(Q)
            state_list.append(state_m)
              
        #convert to nupmy array and train
        Q_list = np.array(Q_list)        
        state_list = np.array(state_list)
        
        hist = model.fit(state_list,Q_list,epochs = 5,verbose = 0)
        #print("Done :",done,"     Reward :",reward,"     Reward_total :",reward_tot)
    
    local_mean.append(reward_tot)
    reward_list.append(reward_tot)
    if episode%10 == 0:
        print("Episode :",episode+1,"     Reward_total :", reward_tot,"     Reward_local_mean :",np.mean(local_mean))
        local_mean = list()
        
print("*******End Learning")

this is my full code of my model i implement dqn algorithm
what's wrong with my code? i've train this model abbout 1000episode but there no progress

what should i change? should i train more episode? or is there anything wrong with my implementation? i've been working on this project soo long help me please

0 Answers0