2

I'm trying to program a neural network to play noughts and crosses (also known as tic tac toe). It works well enough to play against and decreases the loss function when I train it, but only up to a point, after which it plateaus. I have tried playing it and it is still not much better than a random bot.

I have already tried adjusting the learning rate and the size of the hidden layer. I have also previously tried training it on my games against it, as well as training it only based upon the victorious side (interestingly, this minimised the loss function better than my current version, but only tried to attack, and had no clue that it had to prevent me from trying to win). In addition to this, I have tried changing the learning rate so that it decreases by 5 or 10% every 100 self-play games.

I have had a look online but haven't found any python neural networks for tic tac toe that I can compare mine to in order to debug.

import random

def bot_go(player_to_move, game_over, board):
    played = False
    iteration = 0
    while played is False:
        move, input_layer, hidden_layer, output_layer = neural_net_move(iteration, board)
        if board[int(move[0])][int(move[1])] == "-":
            played = True
            board[int(move[0])][int(move[1])] = player_to_move
            if check_for_win(player_to_move, board) is True:
                game_over = True
            elif check_for_draw(board) is True:
                game_over = True
            if player_to_move == "X":
                player_to_move = "O"
            else:
                player_to_move = "X"
        iteration += 1
    return game_over, player_to_move, move, input_layer, hidden_layer, output_layer

 def neural_net_move(iteration, board):
    neural_network_input = {}
    neural_network_hidden_layer = {}
    neural_network_output = []
    layer_1_weights, layer_2_weights, bias = get_neural_network()

    # initialises the input layer
    for i in range(9):
        if board[i // 3][i % 3] == "X":
            neural_network_input[i] = 0
        elif board[i // 3][i % 3] == "O":
            neural_network_input[i] = 1
        else:
            neural_network_input[i] = 0.5

    # calculates the hidden layer neuron values
    for i in range(Global_variables.hidden_layer_size):
        net_total = 0
        for j in range(9):
            net_total += neural_network_input[j]*layer_1_weights[str(j) + str(i)]
        neural_network_hidden_layer[i] = (1/(1 + 2.718**(-net_total)))

    # calculates neural network output
    for i in range(9):
        net_total = 0
        for j in range(Global_variables.hidden_layer_size):
            net_total += neural_network_hidden_layer[j] * layer_2_weights[str(j) + str(i)]
        net_total += bias * layer_2_weights[str(Global_variables.hidden_layer_size) + str(i)]
        neural_network_output.append(1 / (1 + (2.718 ** (-net_total))))

    # finds output value by ordering the outputs in terms of size
    order_of_size = [0]
    for i in range(1, len(neural_network_output)):
        inserted = False
        for j in range(len(order_of_size)):
            if neural_network_output[i] > neural_network_output[order_of_size[j]] and inserted is False:
                order_of_size.insert(j, i)
                inserted = True
            elif j == len(order_of_size) - 1 and inserted is False:
                order_of_size.append(i)
    move = [order_of_size[iteration] // 3, order_of_size[iteration] % 3]
    return move, neural_network_input, neural_network_hidden_layer, neural_network_output


def train_neural_network(input_layer, hidden_layer, output_layer, actual_move):
    layer_1_weights, layer_2_weights, bias = get_neural_network()
    new_l1_weights = {}
    new_l2_weights = {}

    # calculates total error
    total_error = 0
    for i in range(len(output_layer)):
        if actual_move[0] * 3 + actual_move[1] == i:
            total_error += ((1 - output_layer[i])**2)/2
        else:
            total_error += 0.5*((output_layer[i])**2)

    # adjusts second layer weights
    for i in range((hidden_layer_size + 1)*9):
        if actual_move[0] * 3 + actual_move[1] == i % 9:
            d_error_by_d_output_node = output_layer[i % 9] - 1
        else:
            d_error_by_d_output_node = output_layer[i % 9]
        d_output_node_by_d_node_net_value = output_layer[i % 9]*(1 - output_layer[i % 9])
        if i // 9 != hidden_layer_size:
            d_node_net_value_by_d_weight = hidden_layer[i // 9]
        else:
            d_node_net_value_by_d_weight = bias
        d_error_by_d_weight = d_error_by_d_output_node*d_output_node_by_d_node_net_value*d_node_net_value_by_d_weight
         new_l2_weights[str(i // 9) + str(i % 9)] = \
        layer_2_weights[str(i // 9) + str(i % 9)] - learning_rate*d_error_by_d_weight

    # adjusts bias
    d_error_by_d_bias = 0
    for i in range(9):
        d_node_net_value_by_d_bias = layer_2_weights[str(hidden_layer_size) + str(i)]
        d_output_node_by_d_node_net_value = output_layer[i]*(1 - output_layer[i])
        if actual_move[0] * 3 + actual_move[1] == i:
            d_error_by_d_output_node = output_layer[i] - 1
        else:
            d_error_by_d_output_node = output_layer[i]
        d_error_by_d_bias += d_node_net_value_by_d_bias * d_output_node_by_d_node_net_value * d_error_by_d_output_node
    bias = bias - learning_rate * d_error_by_d_bias

    # adjusts first layer weights
    for i in range(hidden_layer_size*9):
        d_error_by_d_weight = 0
        if i // 9 != hidden_layer_size:
            d_output_of_node_by_d_node_net_value = \
            hidden_layer[i % hidden_layer_size]*(1 - hidden_layer[i % hidden_layer_size])
        else:
            d_output_of_node_by_d_node_net_value = \
            bias * (1 - bias)
        d_node_net_value_by_d_weight = input_layer[i // Global_variables.hidden_layer_size]
        for j in range(9):
            d_output_node_net_value_by_d_output_of_node = layer_2_weights[str(i // 9) + str(j)]
            d_output_node_by_d_output_node_net_value = output_layer[j]*(1 - output_layer[j])
            if actual_move[0] * 3 + actual_move[1] == i:
                d_error_by_d_output_node = output_layer[j] - 1
            else:
                d_error_by_d_output_node = output_layer[j]
                d_error_by_d_weight += d_output_of_node_by_d_node_net_value * d_node_net_value_by_d_weight * \
                d_output_node_net_value_by_d_output_of_node * d_output_node_by_d_output_node_net_value * \
                d_error_by_d_output_node
         new_l1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)] = \
         layer_1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)] - \
        d_error_by_d_weight * learning_rate

    network_file = open("neural network", "w")
    line = ""
    for i in range(9 * hidden_layer_size):
        line += str(new_l1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)]) + " "
    network_file.write(line + "\n")
    line = ""
    for i in range(9 * (hidden_layer_size + 1)):
        line += str(new_l2_weights[str(i // 9) + str(i % 9)]) + " "
    network_file.write(line + "\n")
    network_file.write(str(bias))
    network_file.close()

    return total_error

def get_neural_network():
    layer_1_weights = {}
    layer_2_weights = {}

    # opens text file holding neural network
    network_file = open("neural network", "r")
    network = network_file.readlines()
    network_file.close()

    # gets the neural network weights from the text file
    weight_list = network[0].split()
    for i in range(len(weight_list)):
        layer_1_weights[str(i // Global_variables.hidden_layer_size) + str(i % Global_variables.hidden_layer_size)] = float(weight_list[i])
weight_list = network[1].split()
    for i in range(len(weight_list)):
        layer_2_weights[str(i // 9) + str(i % 9)] = float(weight_list[i])
    bias = 1
    return layer_1_weights, layer_2_weights, bias

def make_up_neural_net():
    network_file = open("neural network", "w")
    line = ""
    for i in range(9*Global_variables.hidden_layer_size):
        line += str(random.random()) + " "
    network_file.write(line + "\n")
    line = ""
    for i in range(9*(Global_variables.hidden_layer_size + 1)):
        line += str(random.random()) + " "
    network_file.write(line + "\n")
    network_file.write(str(random.random()))
    network_file.close()

def main():
    error = 0
    make_up_neural_net()
    for i in range(100):
        for j in range(100):
            game_over = False
            winner = ""
            player_to_move = "X"
            board = set_up_board()
            o_moves = []
            x_moves = []
            while game_over is False:
                if player_to_move == "X":
                    game_over, player_to_move, move, input_layer, hidden_layer, output_layer = bot_go(player_to_move, game_over, board)
                    x_moves.append([move, input_layer, hidden_layer, output_layer])
                    if game_over is True:
                        winner = "X"
                else:
                    game_over, player_to_move, move, input_layer, hidden_layer, output_layer = bot_go(player_to_move, game_over, board)
                    o_moves.append([move, input_layer, hidden_layer, output_layer])
                    if game_over is True:
                        winner = "O"
            if winner == "X":
                for move in x_moves:
                    error = train_neural_network(move[1], move[2], move[3], move[0])
                for move in o_moves:
                    error = un_train_neural_network(move[1], move[2], move[3], move[0])
            else:
                for move in o_moves:
                    error = train_neural_network(move[1], move[2], move[3], move[0])
                for move in x_moves:
                   error = un_train_neural_network(move[1], move[2], move[3], move[0])

         print(error)


main()

I would expect this code to print the result of the loss function after every 100 self-play games, and for this value to decrease over time. However, it tends to flatten off at a value of at least 0.45, whereas I believe it should be several orders of magnitude smaller (I was getting a value for the loss function on the order of 10 to the negative 5 when I was training it on me).

I think I'm justified in my view because it is also crap at noughts and crosses when it plays.

I was wondering if this is because there's a problem with my code or because the neural network is not complex enough to model the problem and requires another layer.

NOTE: Sorry about the quantity of code, but I couldn't really find a way to shorten it. I have removed the win/draw checks to shorten it, as well as the "untrain" function, which is just the train function but adding the learning rate multiplied by the derivative instead of subtracting. I can add them back if anyone wants to test the code without the inconvenience of writing the functions themselves.

Nazim Kerimbekov
  • 4,712
  • 8
  • 34
  • 58
Arkleseisure
  • 416
  • 5
  • 19

0 Answers0