I am trying to implement a sequence-to-sequence model using TensorFlow. If I understand correctly, in sequence to sequence the last context state vector of the encoder is passed to the decoder while producing the target sentence. I tried to do the same but cannot get around this shape mismatch error.
Below shows the implementation for it.
import random
import tensorflow as tf
class Encoder(tf.keras.Model):
def __init__(self, source_vocabulary, embedding_dimension, hidden_units):
super(Encoder, self).__init__()
self.source_vocabulary = source_vocabulary
self.embedding_dimension = embedding_dimension
self.hidden_units = hidden_units
# Embedding layer
self.embedding = tf.keras.layers.Embedding(self.source_vocabulary, self.embedding_dimension)
# LSTM layer
self.lstm = tf.keras.layers.LSTM(self.hidden_units, return_state=True)
def call(self, input_vector):
# input_vector = (batch_size, max_len) --> batch_size = # sentences, max_len = # words in sentence
input_vector = self.embedding(input_vector)
# input_vector = (batch_size, max_len, embedding_out) --> For every word in a sentence, we have the dimension
# of embedding out.
output, hidden_state, cell_state = self.lstm(input_vector)
return output, hidden_state, cell_state
class Decoder(tf.keras.Model):
def __init__(self, target_vocab, embedding_dimension, output_dimension, hidden_units):
super(Decoder, self).__init__()
self.hidden_units = hidden_units
self.output_dimension = output_dimension
self.embedding_dimension = embedding_dimension
self.target_vocab = target_vocab
# Embedding
self.embedding = tf.keras.layers.Embedding(self.target_vocab, self.embedding_dimension)
self.lstm = tf.keras.layers.LSTM(self.hidden_units, return_state=True)
self.fc = tf.keras.layers.Dense(output_dimension)
def call(self, input_vector, hidden_state, cell_state):
input_vector = self.embedding(input_vector)
output, hidden_state, cell_state = self.lstm(input_vector, initial_state=[hidden_state, cell_state])
prediction = self.fc(output)
return prediction, hidden_state, cell_state
class Sequence2SequenceModel(tf.keras.Model):
def __init__(self, encoder, decoder, output_vocab):
super(Sequence2SequenceModel, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.out_vocab = output_vocab
def call(self, input_vector, target_vector, teacher_force_ratio=0.5):
batch_size = input_vector.shape[0]
target_length = target_vector.shape[1]
target_vocab = self.out_vocab
outputs = torch.zeros(size=(batch_size, target_length, target_vocab))
_, hidden, cell = self.encoder(input_vector)
# Grab the start token
X = target_vector[:, 0]
for idx in range(1, target_length):
# Since for the very first epoch, X will contain just the start token. Its shape
# will be (shape, ) --> Convert it to (1, shape) saying one batch of first characters
X = tf.expand_dims(X, axis=0)
output, hidden, cell = self.decoder(X, hidden, cell)
outputs[idx] = output
best_guess = tf.argmax(output, axis=1)
X = target_vector[idx] if random.random() < teacher_force_ratio else best_guess
return outputs
if __name__ == '__main__':
input_vector = tf.random.uniform(shape=(64, 128), minval=0, maxval=1000, dtype=tf.int32)
target_vector = tf.random.uniform(shape=(64, 128), minval=0, maxval=1000, dtype=tf.int32)
encoder = Encoder(source_vocabulary=1000, embedding_dimension=256, hidden_units=64)
decoder = Decoder(target_vocab=1000, embedding_dimension=256, hidden_units=64, output_dimension=1000)
model = Sequence2SequenceModel(encoder=encoder, decoder=decoder, output_vocab=1000)
print(model.call(input_vector, target_vector))
The error is as follows:
Traceback (most recent call last):
File "/home/gopudfki/anaconda3/envs/tf2_deepai/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/home/gopudfki/Dokumente/Important Materials/Practise and Revision/Coding and Interview/Compition Programm/python/FrameWork Tutorial/TF2 Tutorial/Projects/Sequence2SequenceWIthoutAttention/Model.py", line 44, in call
output, hidden_state, cell_state = self.lstm(input_vector, initial_state=[hidden_state, cell_state])
ValueError: Exception encountered when calling layer "lstm_1" (type LSTM).
Tensor's shape (1, 64, 64) is not compatible with supplied shape [1, 1, 64].
Call arguments received by layer "lstm_1" (type LSTM):
• inputs=tf.Tensor(shape=(1, 64, 256), dtype=float32)
• mask=None
• training=None
• initial_state=['tf.Tensor(shape=(64, 64), dtype=float32)', 'tf.Tensor(shape=(64, 64), dtype=float32)']
python-BaseException