I have an array self.N
of ints, and I'm trying to write self.N[node] +=1
, but whenever I just write self.N[node]
it gives me a value error for having more than one element, which it can't.
def __init__(self, exploration_weight=1):
self.Q = defaultdict(int) # total reward of each node
self.N = defaultdict(int) # total visit count for each node
self.children = dict() # children of each node
def do_rollout(self, node, player):
"Make the tree one layer better. (Train for one iteration.)"
print("in rollout")
"Find an unexplored descendent of `node`"
path = self._select(node)
leaf = path[-1]
"Update the `children` dict with the children of `node`"
if leaf not in self.children:
self.children[node] = node.find_children()
"Send the reward back up to the ancestors of the leaf"
for node in reversed(path):
self.N[node] += 1
def _select(self, node):
"Find an unexplored descendent of `node`"
path = []
while True:
path.append(node)
if node not in self.children or not self.children[node]:
# node is either unexplored or terminal
return path
unexplored = self.children[node] - self.children.keys()
if unexplored:
n = unexplored.pop()
path.append(n)
return path
node = self.children[node] # descend a layer deeper
I expect that self.N[node] += 1
will simply increment the value of self.N
at index node
I've found in the debugger something seems to be wrong with the keys, but I don't know what.
(Pdb) self.children[leaf]
turn:2
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | X | O |
_ _ _ _ _ _ _
0 1 2 3 4 5 6
*** KeyError: turn: 2, done False, winner: None
and the node comparator seems not to be working either, but I don't know why:
I've also found that just before it raises the error, the debugger says:
(pdb) p self.children.keys()
*** TypeError: 'Node' object is not iterable
even though it clearly worked until this point
Traceback (most recent call last):
File "test_MCTS.py", line 52, in <module>
agent_wins += play_bot()
File "test_MCTS.py", line 18, in play_bot
tree.do_rollout(board, 0) # player 0 is 2nd
File "/Users/TorSaxberg/.../MCTS_minimal.py", line 50, in do_rollout
self.N[node] += 1
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
board is a node containing a [6,7] array and methods to fill it
a minimum example:
from random import randint
from collections import defaultdict
Q = defaultdict(int) # total reward of each node
N = defaultdict(int) # total visit count for each node
children = dict() # children of each node
def do_rollout(num):
"Make the tree one layer better. (Train for one iteration.)"
print("in rollout")
"Find an unexplored descendent of `node`"
path = _select(num)
leaf = path[-1]
"Update the `children` dict with the children of `node`"
if leaf not in children: # a dict()
children[num] = randint(0,5)
"Send the reward back up to the ancestors of the leaf"
for num in reversed(path):
N[num] += 1 # a dict()
def _select(num):
"Find an unexplored descendent of `node`"
path = []
while True:
path.append(num)
if num not in children or not children[num]:
return path
breakpoint()
unexplored = children[num] - children.keys() # a set()
if unexplored:
n = unexplored.pop()
path.append(n)
return path
# descend a layer deeper
num = children[randint(0,5)]
num = randint(0,5)
for _ in range(10):
do_rollout(num)
but i cant get past another TyepError to show the error above
Traceback (most recent call last):
File "test_ValueError.py", line 43, in <module>
do_rollout(num)
File "test_ValueError.py", line 14, in do_rollout
path = _select(num)
File "test_ValueError.py", line 33, in _select
unexplored = children[num] - children.keys() # a set()
TypeError: 'int' object is not iterable
it's weird because node isn't iterable either (from debugging)
TypeError: 'Node' object is not iterable