Iterative deepening with time limit, minimax algorithm with alpha beta pruning and heuristics

Question

I am trying to implement the game Othello and I encountered some problems. AI is not making the best possible moves and keeps losing.

I tried adding some print statements. It seems like evaluation of every valid move results in the same score, so AI always chooses the last move from list of valid moves to make. Here are the relevant functions

def computer_move(self, board, player=1):
        start_time = time.time()
        best_move = None
        best_score = float("-inf")
        for move in self.get_valid_moves(board, player):
            if time.time() - start_time >= 3:
                break
            score = self.iterative_deepening(board, 3)
            print("skorovi u iteraciji:" + str(score))
            if score >= best_score:
                best_score = score
                best_move = move
                print("ako je veci od best_score u iteraciji:" + str(best_score))
                print(best_move)
        print("best_score izvan iteracije izvan iteracije:" + str(best_score))
        print("best_move:" + str(best_move))
        return best_move

    def alpha_beta_minimax(self, board, depth, alpha, beta, maximizing_player, start_time):
        if depth == 0 or self.is_game_over(board):
            return self.evaluate_board(board)

        if maximizing_player:
            max_eval = float("-inf")   
            valid_moves = self.get_valid_moves(board, 1)    
            for move in valid_moves:
                new_board = self.copy_board(board)
                self.make_move(new_board, move, 1)
                evaluation = self.alpha_beta_minimax(new_board, depth - 1, alpha, beta, False, start_time)  
                max_eval = max(max_eval, evaluation)
                alpha = max(alpha, evaluation)
                if beta <= alpha:
                    break 
            return max_eval
        else:  
            min_eval = float("inf")   
            valid_moves = self.get_valid_moves(board, 2)
            for move in valid_moves:
                new_board = self.copy_board(board)
                self.make_move(new_board, move, 2)
                evaluation = self.alpha_beta_minimax(new_board, depth - 1, alpha, beta, True, start_time)
                min_eval = min(min_eval, evaluation)
                beta = min(beta, evaluation)
                if beta <= alpha:
                    break  # alpha cutoff
            return min_eval

    def iterative_deepening(self, board, time_limit):
        start_time = time.time()
        max_depth = 8
        alpha = float("-inf")
        beta = float("inf")
        depth = 0
        best_score = float("-inf")
        while depth <= max_depth:
            if time.time() - start_time >= time_limit:
                break
            score = self.alpha_beta_minimax(board, depth, alpha, beta, True, start_time)
            if score > best_score:
                best_score = score
            print(depth)
            depth += 1
        return best_score

    def evaluate_board(self, board):

        my_tiles = 0
        opp_tiles = 0
        my_front_tiles = 0
        opp_front_tiles = 0
        p = 0   
        c = 0   
        l = 0   
        m = 0   
        f = 0   
        d = 0   

        v = [
            [20, -3, 11, 8, 8, 11, -3, 20],
            [-3, -7, -4, 1, 1, -4, -7, -3],
            [11, -4, 2, 2, 2, 2, -4, 11],
            [8, 1, 2, -3, -3, 2, 1, 8],
            [8, 1, 2, -3, -3, 2, 1, 8],
            [11, -4, 2, 2, 2, 2, -4, 11],
            [-3, -7, -4, 1, 1, -4, -7, -3],
            [20, -3, 11, 8, 8, 11, -3, 20]
        ]

        x1 = [-1, -1, 0, 1, 1, 1, 0, -1]
        y1 = [0, 1, 1, 1, 0, -1, -1, -1]

        #  frontier disks, and disk squares
        for i in range(8):
            for j in range(8):
                if board[i][j] == 1:  
                    d += v[i][j]
                    my_tiles += 1
                elif board[i][j] == 2:
                    d -= v[i][j]
                    opp_tiles += 1
                if board[i][j] != 0:
                    for k in range(8):
                        x = i + x1[k]
                        y = j + y1[k]
                        if 8 > x >= 0 and 8 > y >= 0 and board[x][y] == 0:
                            if board[i][j] == 1:
                                my_front_tiles += 1
                            else:
                                opp_front_tiles += 1
                            break

        
        if my_tiles > opp_tiles:
            p = (100.0 * my_tiles) / (my_tiles + opp_tiles)
        elif my_tiles < opp_tiles:
            p = -(100.0 * opp_tiles) / (my_tiles + opp_tiles)
        else:
            p = 0

        
        if my_front_tiles > opp_front_tiles:
            f = -(100.0 * my_front_tiles) / (my_front_tiles + opp_front_tiles)
        elif my_front_tiles < opp_front_tiles:
            f = (100.0 * opp_front_tiles) / (my_front_tiles + opp_front_tiles)
        else:
            f = 0

        
        my_tiles = opp_tiles = 0
        if board[0][0] == 1:   # ako pripada kompjuteru
            my_tiles += 1
        elif board[0][0] == 2:
            opp_tiles += 1
        if board[0][7] == 1:
            my_tiles += 1
        elif board[0][7] == 2:
            opp_tiles += 1
        if board[7][0] == 1:
            my_tiles += 1
        elif board[7][0] == 2:
            opp_tiles += 1
        if board[7][7] == 1:
            my_tiles += 1
        elif board[7][7] == 2:
            opp_tiles += 1
        c = 25 * (my_tiles - opp_tiles)

        
        my_tiles = opp_tiles = 0
        if board[0][0] == 0:
            if board[0][1] == 1:
                my_tiles += 1
            elif board[0][1] == 2:
                opp_tiles += 1
            if board[1][1] == 1:
                my_tiles += 1
            elif board[1][1] == 2:
                opp_tiles += 1
            if board[1][0] == 1:
                my_tiles += 1
            elif board[1][0] == 2:
                opp_tiles += 1
        if board[0][7] == 0:
            if board[0][6] == 1:
                my_tiles += 1
            elif board[0][6] == 2:
                opp_tiles += 1
            if board[1][6] == 1:
                my_tiles += 1
            elif board[1][6] == 2:
                opp_tiles += 1
            if board[1][7] == 1:
                my_tiles += 1
            elif board[1][7] == 2:
                opp_tiles += 1
        if board[7][0] == 0:
            if board[7][1] == 1:
                my_tiles += 1
            elif board[7][1] == 2:
                opp_tiles += 1
            if board[6][1] == 1:
                my_tiles += 1
            elif board[6][1] == 2:
                opp_tiles += 1
            if board[6][0] == 1:
                my_tiles += 1
            elif board[6][0] == 2:
                opp_tiles += 1
        if board[7][7] == 0:
            if board[6][7] == 1:
                my_tiles += 1
            elif board[6][7] == 2:
                opp_tiles += 1
            if board[6][6] == 1:
                my_tiles += 1
            elif board[6][6] == 2:
                opp_tiles += 1
            if board[7][6] == 1:
                my_tiles += 1
            elif board[7][6] == 2:
                opp_tiles += 1
        l = -12.5 * (my_tiles - opp_tiles)

        
        my_tiles = self.num_valid_moves(board, player=1)
        opp_tiles = self.num_valid_moves(board, player=2)
        if my_tiles > opp_tiles:
            m = (100.0 * my_tiles) / (my_tiles + opp_tiles)
        elif my_tiles < opp_tiles:
            m = -(100.0 * opp_tiles) / (my_tiles + opp_tiles)
        else:
            m = 0

        #  score
        score = (10 * p) + (801.724 * c) + (382.026 * l) + (78.922 * m) + (74.396 * f) + (10 * d)
        return score

Please trim your code to make it easier to find your problem. Follow these guidelines to create a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example). — Community, May 24 '23 at 00:56

Iterative deepening with time limit, minimax algorithm with alpha beta pruning and heuristics

0 Answers0