#Evaluating agents performance
total_epochs, total_penalties = 0, 0
episodes = 1
for _ in range(episodes):
state = env.reset()
epochs, penalties, reward = 0 ,0 ,0
done = False
while not done:
state, reward, done, info = env.step(np.argmax(q_table[state]))
if reward == -10:
penalties += 1
epochs += 1
total_penalties += penalties
total_epochs += epochs
print(f"Results after {episodes} episodes")
print(f"Averge Timestep: {total_epochs / episodes}")
print(f"Average penalties: {total_penalties / episodes}")
The execution is forever struck on line {state, reward, done, info = env.step(np.argmax(q_table[state])}
#specifically on np.argmax part
i tried breaking down the code and its working fine individually,
the np.argmax(q_table[state]) line if wroking as a standalone but as soon as i put it in while loop the exection is struck forever there.