2

I am using keras with tensorlow backend to create a Deep Q-learning agent to play atari games on openai gym. But when i train the model my gpu utilization stays around 8 to 10 percent.I am new to this stuff and am unable to figure out how to improve my gpu utilization. Can you please give some tips to improve? Here is the code :

import gym
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.animation as anim
import time

from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, Lambda
from keras.optimizers import RMSprop
from keras import backend as k
from skimage.color import rgb2gray
from skimage.transform import resize
from collections import deque

class DQNAgent() :

    def __init__(self, n_actions):
        self.learning_rate = 0.00025
        self.epsilon = 1.0
        self.epsilon_min = 0.1
        self.epsilon_decay = 0.0001
        self.gamma = 0.99
        self.n_actions = n_actions
        self.batch_size = 32
        self.model = self.create_model()
        self.memory = deque(maxlen=100000)

    def create_model(self) :
        model = Sequential()

        model.add(Lambda(lambda x : x/255.0, input_shape=(84, 84, 4)))
        model.add(Conv2D(filters=16, kernel_size=(8,8), strides=(4,4), activation='relu'))
        model.add(Conv2D(filters=32, kernel_size=(4,4), strides=(2,2), activation='relu'))
        model.add(Flatten())
        model.add(Dense(units=256, activation='relu'))
        model.add(Dense(units=self.n_actions))

        model.compile(optimizer=RMSprop(learning_rate=self.learning_rate, rho=0.95, epsilon=0.01), loss=huber_loss)

        return model

    def act(self, state) :
        if random.random() <= self.epsilon :
            return random.randint(0, self.n_actions - 1)

        return(np.argmax(self.model.predict(state)[0]))

    def remember(self, state, action, reward, next_state, dead) :
        self.memory.append((state, action, reward, next_state, dead))

    def replay(self) :
        mini_batch = random.sample(self.memory, self.batch_size)

        state = np.zeros((self.batch_size, 84, 84, 4))
        next_state = np.zeros_like(state)
        target = np.zeros((self.batch_size,))
        action, reward, dead = [], [], []

        for idx, val in enumerate(mini_batch) :
            state[idx] = val[0]
            action.append(val[1])
            reward.append(val[2])
            next_state[idx] = val[3]
            dead.append(val[4])

        future_q = self.model.predict(next_state, batch_size=self.batch_size)

        for i in range(self.batch_size) :
            if dead[i] :
                target[i] = -1
            else :
                target[i] = reward[i] + self.gamma*np.amax(future_q[i])

        action_one_hot = get_one_hot(action, self.n_actions)
        target_one_hot = action_one_hot * target[:, None]

        loss = self.model.fit(state, target_one_hot, batch_size=self.batch_size, epochs=1, verbose=0).history['loss'][0]

        return loss

    def preprocess(self, image) :
        return np.uint8(resize(rgb2gray(image), output_shape=(84, 84), mode='constant') * 255)

    def save_model(self) :
        self.model.save_weights('model.json')

    def load_model(self) :
        self.model.load_weights('model.json')

def get_one_hot(arr, num) :
        return np.eye(num)[np.array(arr).reshape(-1)]

def huber_loss(y, q_value):
    error = k.abs(y - q_value)
    quadratic_part = k.clip(error, 0.0, 1.0)
    linear_part = error - quadratic_part
    loss = k.mean(0.5 * k.square(quadratic_part) + linear_part)

    return loss

def train(resume=False) :
    env = gym.make('BreakoutDeterministic-v4')
    agent = DQNAgent(env.action_space.n)

    for i in range(1000) :

        state = env.reset()

        if resume :
            agent.load_model()

        # Do no operation for 30 iterations
        for _ in range(30) :
            state, _, _, _ = env.step(1)

        state = agent.preprocess(state)
        state = np.stack((state, state, state, state), axis = 2)
        state = np.reshape(state, (1, 84, 84, 4))

        done, dead = False, False
        score, loss, lives = 0, 0, 5

        while not done :
            env.render()

            # Select action based on the state
            action = agent.act(state)
            if len(agent.memory) > 5000 and agent.epsilon > agent.epsilon_min :
                agent.epsilon -= agent.epsilon_decay

            # Take a step in the environment
            next_state, reward, done, info = env.step(action)
            score += reward


            if lives > info['ale.lives'] :
                dead = True
                lives = info['ale.lives']

            next_state = agent.preprocess(next_state)
            next_state = np.reshape(next_state, (1, 84, 84, 1))
            next_state = np.append(next_state, state[:,:,:,:3], axis = 3)

            # Store into memory
            agent.remember(state, action, reward, next_state, dead)

            # if enough memory size start training
            if len(agent.memory) > 5000 :
                loss += agent.replay()

            if dead :
                dead = False
            else :
                state = next_state

            if done :
                print("Episode : {0}, score : {1}, loss : {2}, memory size : {3}".format(i, score, loss, len(agent.memory)))

    env.close()
    agent.save_model()

def test() :
    env = gym.make('BreakoutDeterministic-v4')
    agent = DQNAgent(env.action_space.n)
    agent.load_model()

    for i in range(100) :

        state = env.reset()

        for _ in range(30) :
            state, _, _, _ = env.step(0)

        state = agent.preprocess(state)
        state = np.stack((state, state, state, state), axis = 2)
        state = np.reshape(state, (1, 84, 84, 4))

        done, dead = False, False
        score, lives = 0, 5

        while not done :
            env.render()

            action = agent.act(state)

            next_state, reward, done, info = env.step(action)
            score += reward

            if lives > info['ale.lives'] :
                dead = True
                lives = info['ale.lives']

            next_state = agent.preprocess(next_state)
            next_state = np.reshape(next_state, (1, 84, 84, 1))
            next_state = np.append(next_state, state[:,:,:,:3], axis = 3)

            if dead :
                dead = False
            else :
                state = next_state

            if done :
                print("Episode : {0}, score : {1}".format(i, score))

if __name__ == "__main__":
    train(False)
    #test()

As soon as model.fit is called gpu usage decreases drastically. So i guess the problem lies there?

I tried increasing my batch-size but that got me only to 9 to 11 percent gpu usage.

I am on a laptop with :

Nvidia GTX 1050ti

8 GB ram

i7-8750H processor at 2.20 GHz

Rushik
  • 29
  • 2
  • Firstly, can you tell us your `CUDA` usage? you can view it from the task manager by clicking any label of a graph and selecting `CUDA`. Also, I want to ask why you are worried about the GPU usage. If you are training a DQN, then you should be worried about the performance of the agent rather than the GPU.... – neel g Apr 27 '20 at 13:21
  • CUDA usage stays constant on 11%.And regarding my agent's performance, its currently training and about 5500 epochs in the average score on breakout game is 2.5 currently. Is it peforming badly? – Rushik Apr 28 '20 at 04:00
  • Are you sure the score is `2.5`? That is not the expected score for breakout for a DQN. The problem is not with your GPU but rather your code. Look up 'how to improve DQN' on Google. There is no problem with your hardware setup.... – neel g Apr 28 '20 at 05:25
  • Sorry i meant the average score. – Rushik Apr 28 '20 at 05:25
  • 1
    Yes, that is extremely poor. I was expecting somewhere along the lines of `30-50`. You should optimize your code first... – neel g Apr 28 '20 at 06:52
  • I actually just changed my code to include a target network and started training ....other thn that idk i don't know what to improve.I will look for something thoough. Thanks! – Rushik Apr 28 '20 at 06:55

0 Answers0