i am trying to learn DQN agent to play Tic Tac Toe using Keras. Issue is that my output has different shape than I expected.
Details:
Input shape : (BOARD_SIZE ^ 2) * 3
--> It is one hot encoded game board
Output shape: I expect that output will be list with size of (BOARD_SIZE^2)
because it should number of available actions
Problem:
Output has shape Size of input layer [(BOARD_SIZE ^ 2) *3] * Number of actions (BOARD_SIZE^2)
I tried to look for solutions but Keras documentation is quite poor. Plz help
THIS IS MY MODEL
def create_model(self, game: GameController) -> Sequential:
input_size = (game.shape ** 2) * 3
model = Sequential()
model.add(Dense(input_size, input_dim=1, activation='relu'))
model.add(Dense(int(input_size / 2), activation='relu'))
model.add(Dense(int(input_size / 2), activation='relu'))
model.add(Dense((game.shape ** 2), activation='linear'))
model.compile(loss="mean_squared_error", optimizer=Adam(self.alpha))
return model
THIS IS HOW I AM TRYING TO GET OUTPUT
q_values = self.model.predict(processed_input)
THIS IS BOAD PREPROCESSING (ONE HOT ENCODING)
def preprocess_input(self, game: GameController) -> list:
encoded_x = copy.deepcopy(game.board)
encoded_o = copy.deepcopy(game.board)
encoded_blank = copy.deepcopy(game.board)
for row in range(game.shape):
for col in range(game.shape):
if encoded_x[row][col] == 'X':
encoded_x[row][col] = 1
else:
encoded_x[row][col] = 0
if encoded_o[row][col] == 'O':
encoded_o[row][col] = 1
else:
encoded_o[row][col] = 0
if encoded_blank[row][col] == '-':
encoded_blank[row][col] = 1
else:
encoded_blank[row][col] = 0
chained_x = list(chain.from_iterable(encoded_x))
chained_o = list(chain.from_iterable(encoded_o))
chained_blank = list(chain.from_iterable(encoded_blank))
string_board = list(chain(chained_x, chained_o, chained_blank))
board_to_int = [int(element) for element in string_board]
return board_to_int