I would like to create an AI for the Chrome-No-Internet-Dino-Game. Therefore I adapted this Github-Repository to fit my needs. I used the following formula to calculate the new Q:
Source: https://en.wikipedia.org/wiki/Q-learning
My problem now is that even after ~ 2.000.000 iterations my game score is not increasing.
You can find the game file here: https://pastebin.com/XrwQ0suJ
QLearning.py:
import pickle
import Game_headless
import Game
import numpy as np
from collections import defaultdict
rewardAlive = 1
rewardKill = -10000
alpha = 0.2 # Learningrate
gamma = 0.9 # Discount
Q = defaultdict(lambda: [0, 0, 0]) # 0 = Jump / 1 = Duck / 2 = Do Nothing
oldState = None
oldAction = None
gameCounter = 0
gameScores = []
def paramsToState(params):
cactus1X = round(params["cactus1X"] / 10) * 10
cactus2X = round(params["cactus2X"] / 10) * 10
cactus1Height = params["cactus1Height"]
cactus2Height = params["cactus2Height"]
pteraX = round(params["pteraX"] / 10) * 10
pteraY = params["pteraY"]
playerY = round(params["playerY"] / 10) * 10
gamespeed = params["gamespeed"]
return str(cactus1X) + "_" + str(cactus2X) + "_" + str(cactus1Height) + "_" + \
str(cactus2Height) + "_" + str(pteraX) + "_" + str(pteraY) + "_" + \
str(playerY) + "_" + str(gamespeed)
def shouldEmulateKeyPress(params): # 0 = Jump / 1 = Duck / 2 = Do Nothing
global oldState
global oldAction
state = paramsToState(params)
oldState = state
estReward = Q[state]
action = estReward.index(max(estReward))
if oldAction is None:
oldAction = action
return action
# Previous action was successful
# -> Update Q
prevReward = Q[oldState]
prevReward[oldAction] = (1 - alpha) * prevReward[oldAction] + \
alpha * (rewardAlive + gamma * max(estReward))
Q[oldState] = prevReward
oldAction = action
return action
def onGameOver(score):
# Previous action was NOT successful
# -> Update Q
global oldState
global oldAction
global gameCounter
global gameScores
gameScores.append(score)
if gameCounter % 10000 == 0:
print(f"{gameCounter} : {np.mean(gameScores[-100:])}")
prevReward = Q[oldState]
prevReward[oldAction] = (1 - alpha) * prevReward[oldAction] + \
alpha * rewardKill
Q[oldState] = prevReward
oldState = None
oldAction = None
if gameCounter % 10000 == 0:
with open("Q\\" + str(gameCounter) + ".pickle", "wb") as file:
pickle.dump(dict(Q), file)
gameCounter += 1
Game_headless.main(shouldEmulateKeyPress, onGameOver)
On every frame the gameplay()
function from Game_headless.py
calls shouldEmulateKeyPress()
. Said function then returns 0 for Jump, 1 for duck and 2 for nothing.
I tried adjusting the constants, but that didn't show any effect.
If you any questions, please don't hesitate to ask me!
Thank you in advance!