I coded a Hanoi towers program which print each step of the game
#Hanoi Towers game
n = 3 # number of disks
beg = [ n-i for i in range(n)]
mid = []
end = []
towers = [beg, mid, end]
def move(beg, end) :
summit_beg = 0
summit_end = 0
if len(towers[beg]) > 0 :
summit_beg = towers[beg][len(towers[beg]) - 1]
if summit_beg > summit_end and summit_end != 0 :
print("err: summit", beg, ">summit", end, "Gameover")
else:
towers[beg].pop(len(towers[beg]) - 1)
towers[end].append(summit_beg)
print(towers) ; print()
def hanoi(n, beg = 0, mid = 1, end = 2):
if n > 0 :
hanoi(n - 1, beg, end, mid)
move(beg, end)
hanoi(n - 1, mid, beg, end)
hanoi(n)
out :
[[3, 2], [], [1]]
[[3], [2], [1]]
[[3], [2, 1], []]
[[], [2, 1], [3]]
[[1], [2], [3]]
[[1], [], [3, 2]]
[[], [], [3, 2, 1]]
Now, for a reinforcement learning project, I need to create a states-space list of that each steps of the game, which so is a list to fill in as my learning agent go encounting each states of the game (so lists of the Hanoi disks 3, 2, 1).
So I want finally to output a list like this :
Space = [ [[3, 2], [], [1]] ,
[[3], [2], [1]] ,
[[3], [2, 1], []] ,
[[], [2, 1], [3]] ,
[[1], [2], [3]] ,
[[1], [], [3, 2]] ,
[[], [], [3, 2, 1]] ]
Let's observe a test code
space = []
temp = [[] for i in range(4)]
j = -1
print(j)
temp[j] = []
print(temp[j])
space.append(temp[j])
print(space) ; print()
j += 1
print(j)
temp[j] = [0]
print(temp[j])
space.append(temp[j])
print(space) ; print()
j += 1
print(j)
temp[j] = [1]
print(temp[j])
space.append(temp[j])
print(space) ; print()
j += 1
print(j)
temp[j] = [2]
print(temp[j])
space.append(temp[j])
print(space)
out :
-1
[]
[[]]
0
[0]
[[], [0]]
1
[1]
[[], [0], [1]]
2
[2]
[[], [0], [1], [2]]
When you examine this code you observe that the "space" list fills up as the counter j increases, and this with lists that are different from each other.
However if I inject the same process in the following program (so in the previous Hanoi towers code):
#Towers of Hanoi
n = 3
Beg = [ n - i for i in range(n)]
Mid = []
End = []
Towers = [ Beg, Mid, End]
Space = [ [[ n - i for i in range(n)], [], []] ]
Temp = [ [] for i in range(n**3)]
j = 0
def move(Beg, End):
summit_B = 0
summit_E = 0
if len(Towers[Beg]) > 0 :
summit_B = Towers[Beg][len(Towers[Beg]) - 1]
if len(Towers[End]) > 0 :
summit_E = Towers[End][len(Towers[End]) - 1]
if summit_B > summit_E and summit_E != 0 :
print("Error")
else :
global j # global not forgotten
Towers[Beg].pop(len(Towers[Beg]) - 1)
Towers[End].append(summit_B)
print(Towers) ; print() # The list Towers change at each step
print(j) ; print() # counter increase
Temp[j] = Towers
print(Temp[j]) ; print() ; print() # We check that we place Towers
# (each time a different list) to a variable also incremented
# different each time
Space.append(Temp[j]) # The list is therefore supposed to be filled as
# measurement with each different turns (like in the test code)
print(Space) ; print() ; print() ; print()
# BUT we end up with (j - 1) times the same Towers list at each increment (corresponding the list in the step of the game)
j += 1
def hanoi(n, Beg = 0, Mid = 1, End = 2):
if n > 0 :
hanoi(n - 1, Beg, End, Mid)
move(Beg, End)
hanoi(n - 1, Mid, Beg, End)
hanoi(n)
Out :
[[3, 2], [], [1]] # first game step (list Towers)
0 # verification of the counter j
[[3, 2], [], [1]] # verification of the variable Temp[j] logically increased, which contains the Towers
# Space :
[[[3, 2, 1], [], []], [[3, 2], [], [1]]] # so far so good with .append( Temp[0])
[[3], [2], [1]] # next state of the towers of Hanoi game
1 # same thing : counter verified
[[3], [2], [1]] # Same thing: the NEW variable Temp[j = 1] contains the new step-state of the game (the list Towers is refreshed), it's supposed to be verified.
# print(Space.append(Temp[1] = Towers) ):
[[[3, 2, 1], [], []], [[3], [2], [1]], [[3], [2], [1]]] # ?? 2 times the #same Towers list at step 3... ?
# not expected ..
# For me, we was supposed to obtain :
# "[[[3, 2, 1], [], []], [[3, 2], [], [1]], [[3], [2], [1]]]"
... then same problem
... then same problem
What can I try next?