I am using keras with tensorlow backend to create a Deep Q-learning agent to play atari games on openai gym. But when i train the model my gpu utilization stays around 8 to 10 percent.I am new to this stuff and am unable to figure out how to improve my gpu utilization. Can you please give some tips to improve? Here is the code :
import gym
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.animation as anim
import time
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, Lambda
from keras.optimizers import RMSprop
from keras import backend as k
from skimage.color import rgb2gray
from skimage.transform import resize
from collections import deque
class DQNAgent() :
def __init__(self, n_actions):
self.learning_rate = 0.00025
self.epsilon = 1.0
self.epsilon_min = 0.1
self.epsilon_decay = 0.0001
self.gamma = 0.99
self.n_actions = n_actions
self.batch_size = 32
self.model = self.create_model()
self.memory = deque(maxlen=100000)
def create_model(self) :
model = Sequential()
model.add(Lambda(lambda x : x/255.0, input_shape=(84, 84, 4)))
model.add(Conv2D(filters=16, kernel_size=(8,8), strides=(4,4), activation='relu'))
model.add(Conv2D(filters=32, kernel_size=(4,4), strides=(2,2), activation='relu'))
model.add(Flatten())
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=self.n_actions))
model.compile(optimizer=RMSprop(learning_rate=self.learning_rate, rho=0.95, epsilon=0.01), loss=huber_loss)
return model
def act(self, state) :
if random.random() <= self.epsilon :
return random.randint(0, self.n_actions - 1)
return(np.argmax(self.model.predict(state)[0]))
def remember(self, state, action, reward, next_state, dead) :
self.memory.append((state, action, reward, next_state, dead))
def replay(self) :
mini_batch = random.sample(self.memory, self.batch_size)
state = np.zeros((self.batch_size, 84, 84, 4))
next_state = np.zeros_like(state)
target = np.zeros((self.batch_size,))
action, reward, dead = [], [], []
for idx, val in enumerate(mini_batch) :
state[idx] = val[0]
action.append(val[1])
reward.append(val[2])
next_state[idx] = val[3]
dead.append(val[4])
future_q = self.model.predict(next_state, batch_size=self.batch_size)
for i in range(self.batch_size) :
if dead[i] :
target[i] = -1
else :
target[i] = reward[i] + self.gamma*np.amax(future_q[i])
action_one_hot = get_one_hot(action, self.n_actions)
target_one_hot = action_one_hot * target[:, None]
loss = self.model.fit(state, target_one_hot, batch_size=self.batch_size, epochs=1, verbose=0).history['loss'][0]
return loss
def preprocess(self, image) :
return np.uint8(resize(rgb2gray(image), output_shape=(84, 84), mode='constant') * 255)
def save_model(self) :
self.model.save_weights('model.json')
def load_model(self) :
self.model.load_weights('model.json')
def get_one_hot(arr, num) :
return np.eye(num)[np.array(arr).reshape(-1)]
def huber_loss(y, q_value):
error = k.abs(y - q_value)
quadratic_part = k.clip(error, 0.0, 1.0)
linear_part = error - quadratic_part
loss = k.mean(0.5 * k.square(quadratic_part) + linear_part)
return loss
def train(resume=False) :
env = gym.make('BreakoutDeterministic-v4')
agent = DQNAgent(env.action_space.n)
for i in range(1000) :
state = env.reset()
if resume :
agent.load_model()
# Do no operation for 30 iterations
for _ in range(30) :
state, _, _, _ = env.step(1)
state = agent.preprocess(state)
state = np.stack((state, state, state, state), axis = 2)
state = np.reshape(state, (1, 84, 84, 4))
done, dead = False, False
score, loss, lives = 0, 0, 5
while not done :
env.render()
# Select action based on the state
action = agent.act(state)
if len(agent.memory) > 5000 and agent.epsilon > agent.epsilon_min :
agent.epsilon -= agent.epsilon_decay
# Take a step in the environment
next_state, reward, done, info = env.step(action)
score += reward
if lives > info['ale.lives'] :
dead = True
lives = info['ale.lives']
next_state = agent.preprocess(next_state)
next_state = np.reshape(next_state, (1, 84, 84, 1))
next_state = np.append(next_state, state[:,:,:,:3], axis = 3)
# Store into memory
agent.remember(state, action, reward, next_state, dead)
# if enough memory size start training
if len(agent.memory) > 5000 :
loss += agent.replay()
if dead :
dead = False
else :
state = next_state
if done :
print("Episode : {0}, score : {1}, loss : {2}, memory size : {3}".format(i, score, loss, len(agent.memory)))
env.close()
agent.save_model()
def test() :
env = gym.make('BreakoutDeterministic-v4')
agent = DQNAgent(env.action_space.n)
agent.load_model()
for i in range(100) :
state = env.reset()
for _ in range(30) :
state, _, _, _ = env.step(0)
state = agent.preprocess(state)
state = np.stack((state, state, state, state), axis = 2)
state = np.reshape(state, (1, 84, 84, 4))
done, dead = False, False
score, lives = 0, 5
while not done :
env.render()
action = agent.act(state)
next_state, reward, done, info = env.step(action)
score += reward
if lives > info['ale.lives'] :
dead = True
lives = info['ale.lives']
next_state = agent.preprocess(next_state)
next_state = np.reshape(next_state, (1, 84, 84, 1))
next_state = np.append(next_state, state[:,:,:,:3], axis = 3)
if dead :
dead = False
else :
state = next_state
if done :
print("Episode : {0}, score : {1}".format(i, score))
if __name__ == "__main__":
train(False)
#test()
As soon as model.fit is called gpu usage decreases drastically. So i guess the problem lies there?
I tried increasing my batch-size but that got me only to 9 to 11 percent gpu usage.
I am on a laptop with :
Nvidia GTX 1050ti
8 GB ram
i7-8750H processor at 2.20 GHz