I am currently taking cs188 courses offered by UCB. I encountered the problem when implementing the alpha-beta pruning algorithm. Two different ways of implementation is showed below.
# this one works
def max_value(self, gamestate, depth, agentIndex, alpha, beta):
v_max = float('-inf')
max_action = ''
# get the possible actions from gamestate
legal_actions = gamestate.getLegalActions(agentIndex)
for action in legal_actions:
# get the successor corresponding to each action
successor = gamestate.generateSuccessor(agentIndex, action)
successor_value = self.value(successor, depth, agentIndex+1, alpha, beta)
# get the maximum value
if successor_value[0] > v_max:
v_max = successor_value[0]
max_action = action
# pruning
if v_max > beta:
return (v_max, max_action)
alpha = max(v_max, alpha)
return (v_max, max_action)
# this one fails to prune some nodes that should be pruned
def max_value(self, gamestate, depth, agentIndex, alpha, beta):
v_max = float('-inf')
max_action = ''
# get the possible actions from gamestate
legal_actions = gamestate.getLegalActions(agentIndex)
# get the successor corresponding to each action
successors = []
for action in legal_actions:
successors.append(gamestate.generateSuccessor(agentIndex, action))
for i in range(len(successors)):
successor_value = self.value(successors[i], depth, agentIndex+1, alpha, beta)
# get the maximum value
if successor_value[0] > v_max:
v_max = successor_value[0]
max_action = legal_actions[i]
# pruning
if v_max > beta:
return (v_max, '')
alpha = max(v_max, alpha)
return (v_max, max_action)
what's the difference between them. I guess there's something wrong with the alpha-beta updating in the second implementation, but I failed to locate the bug.