0

I'm trying to build my first Tic-Tac-Toe game. I've modelled both players playing MiniMax strategy.The point is Players seem to pick moves that maximize their own utility correctly, but they won't stop the opponent from making 3 in a row. I can't explain this behavior, since I built the code such that each player considers opponent's success as their own negative utility. l'd appreciate any advice.

My main:


game = Tgame()
first_player = Minimax(game=game)
second_player = Minimax(game=game)

state = game.initial_state
moves = game.play(first_player, second_player)

My search strategy:

class Minimax:

    def __init__(self, game):
        self.game = game

#s,a are the state and the action used to reach it

    def max_value(self, state):
        if self.game.terminal_test(state):
            return self.game.player_utility(state)
        values = [self.min_value(s) for s, a in self.game.successors(state)]
        return max(values)

    def min_value(self, state):
        if self.game.terminal_test(state):
            return self.game.player_utility(state)
        values = [self.max_value(s) for s, a in self.game.successors(state)]
        return min(values)


    def next_move(self, state):
        moves = self.game.actions(state)
        return max(moves, key=lambda move: self.min_value(self.game.result(state, move)))

My Game:

import copy

class Game:
    def __init__(self, initial_state, player):
        self.initial_state = initial_state
        self.player = player

    def actions(self, state):
        return []

    def result(self, state, action):
        return action

    def successors(self, state):
        possible_actions = self.actions(state)
        return [(self.result(state, a), a) for a in possible_actions]

    def terminal_test(self, state):
        return False

    def utility(self, state):
        return 0


    def player_utility(self, state):
        if self.player == 'MAX':
            # for MAX player
            return self.utility(state)
        elif self.player == 'MIN':
            # for MIN player
            return -self.utility(state)
        else:
            raise ValueError


    def player_symbol(self):
        if self.player == 'MAX':
            return "x"
        elif self.player == 'MIN':
            return "o"
        else:
            raise ValueError


    def next_player(self):
        """
        Return the next player to move
        @return: MAX or MIN
        """
        if self.player == 'MAX':
            return 'MIN'
        else:
            return 'MAX'

    def play(self, player_one, player_two):

        state = self.initial_state
        players = [player_one, player_two]
        moves = []
        while True:
            for player in players:
                if self.terminal_test(state):
                    self.display(state)
                    print('----- GAME OVER -----\n\n')
                    return moves
                self.display(state)
                move = player.next_move(state)
                state = self.result(state, move)
                self.display_move(state, move)
                moves.append((move, self.player))
                self.player = self.next_player()
                print('_____________________')



    def display(self, state):
        print('_____________________')
        print(self.player)

    def display_move(self, state, move):
        print(self.player, f'--{move}--> ', state)
        self.printBoard(state)

    def printBoard(self,state):
        for row in state:
            print('\n')
            for col in row:
                print("[",col,"] ",end="")



class Tgame(Game):
    def __init__(self, initial_state=None, player='MAX'):
        super(Tgame, self).__init__(initial_state, player)
        self.initial_state = [["","",""],["","",""],["","",""]]
        self.player = player

    def actions(self, state):
        actions = []
        for i in range(0,3):
            for j in range(0,3):
                newstate = copy.deepcopy(state)
                if newstate[i][j] == "":
                    newstate[i][j] = self.player_symbol()
                    actions.append(newstate)
        return actions


    def terminal_test(self, state):

        #check for alignments
        if self.utility(state) in [-1, 1]:
            return True

        #check for fully complete board
        for row in state:
            for col in row:
                if col == "":
                    return False

        return True

    def utility(self, state):
        #check goal on rows
        for row in state:
            utility = all(row[j] == "x" for j in range(0, 3))
            if utility:
                return 1

        #check goal on columns
        for col in range(0, 3):
            utility=all(row[col] == "x" for row in state)
            if utility:
                return 1

        #check goal on diagonals
        if state[0][0] == state[1][1] == state[2][2] == "x" or state[0][2] == state[1][1] == state[2][0] == "x":
            return 1

        #check goal on rows
        for row in state:
            utility=all(row[j] == "o" for j in range(0, 3))
            if utility:
                return -1

        #check goal on columns
        for col in range(0, 3):
            utility=all(row[col] == "o" for row in state)
            if utility:
                return -1

        #check goal on diagonalals
        if state[0][0] == state[1][1] == state[2][2] == "o" or state[0][2] == state[1][1] == state[2][0] == "o":
            return -1
        return 0

user13860
  • 3
  • 2
  • What did you intend to do with `utility` always returning 0 -- ignoring the `state` argument -- and then doing `return -self.utility(state)`? This is certainly wrong. – trincot Apr 10 '23 at 21:50
  • @trincot utility 0 is returned at the end if no 3-alignment is found. A player's victory yields utility 1 whereas his opponent's one causes it to drop to -1. `-self.utility(state)` adapts the utility for the other player, since I modelled the game as a fixed-sum game – user13860 Apr 10 '23 at 22:00
  • This code is too confusing for me. There is `utility` as a method, then as a variable... and the name "utility" doesn't have any enlightening meaning to me, not to mention `s` and `a`. – trincot Apr 10 '23 at 22:11

0 Answers0