Shape Mismatch: Tensorflow Implementation for Sequence to Sequence models from scratch

Question

I am trying to implement a sequence-to-sequence model using TensorFlow. If I understand correctly, in sequence to sequence the last context state vector of the encoder is passed to the decoder while producing the target sentence. I tried to do the same but cannot get around this shape mismatch error.

Below shows the implementation for it.

import random
import tensorflow as tf


class Encoder(tf.keras.Model):
    def __init__(self, source_vocabulary, embedding_dimension, hidden_units):
        super(Encoder, self).__init__()
        self.source_vocabulary = source_vocabulary
        self.embedding_dimension = embedding_dimension
        self.hidden_units = hidden_units

        # Embedding layer
        self.embedding = tf.keras.layers.Embedding(self.source_vocabulary, self.embedding_dimension)

        # LSTM layer
        self.lstm = tf.keras.layers.LSTM(self.hidden_units, return_state=True)

    def call(self, input_vector):
        # input_vector = (batch_size, max_len) --> batch_size = # sentences, max_len = # words in sentence
        input_vector = self.embedding(input_vector)
        # input_vector = (batch_size, max_len, embedding_out) --> For every word in a sentence, we have the dimension
        # of embedding out.
        output, hidden_state, cell_state = self.lstm(input_vector)
        return output, hidden_state, cell_state


class Decoder(tf.keras.Model):
    def __init__(self, target_vocab, embedding_dimension, output_dimension, hidden_units):
        super(Decoder, self).__init__()
        self.hidden_units = hidden_units
        self.output_dimension = output_dimension
        self.embedding_dimension = embedding_dimension
        self.target_vocab = target_vocab

        # Embedding
        self.embedding = tf.keras.layers.Embedding(self.target_vocab, self.embedding_dimension)
        self.lstm = tf.keras.layers.LSTM(self.hidden_units, return_state=True)
        self.fc = tf.keras.layers.Dense(output_dimension)

    def call(self, input_vector, hidden_state, cell_state):
        input_vector = self.embedding(input_vector)
        output, hidden_state, cell_state = self.lstm(input_vector, initial_state=[hidden_state, cell_state])
        prediction = self.fc(output)
        return prediction, hidden_state, cell_state


class Sequence2SequenceModel(tf.keras.Model):
    def __init__(self, encoder, decoder, output_vocab):
        super(Sequence2SequenceModel, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.out_vocab = output_vocab

    def call(self, input_vector, target_vector, teacher_force_ratio=0.5):
        batch_size = input_vector.shape[0]
        target_length = target_vector.shape[1]
        target_vocab = self.out_vocab

        outputs = torch.zeros(size=(batch_size, target_length, target_vocab))

        _, hidden, cell = self.encoder(input_vector)

        # Grab the start token
        X = target_vector[:, 0]

        for idx in range(1, target_length):
            # Since for the very first epoch, X will contain just the start token. Its shape
            # will be (shape, ) --> Convert it to (1, shape) saying one batch of first characters

            X = tf.expand_dims(X, axis=0)
            output, hidden, cell = self.decoder(X, hidden, cell)

            outputs[idx] = output
            best_guess = tf.argmax(output, axis=1)

            X = target_vector[idx] if random.random() < teacher_force_ratio else best_guess

        return outputs


if __name__ == '__main__':
    input_vector = tf.random.uniform(shape=(64, 128), minval=0, maxval=1000, dtype=tf.int32)
    target_vector = tf.random.uniform(shape=(64, 128), minval=0, maxval=1000, dtype=tf.int32)

    encoder = Encoder(source_vocabulary=1000, embedding_dimension=256, hidden_units=64)
    decoder = Decoder(target_vocab=1000, embedding_dimension=256, hidden_units=64, output_dimension=1000)

    model = Sequence2SequenceModel(encoder=encoder, decoder=decoder, output_vocab=1000)
    print(model.call(input_vector, target_vector))

The error is as follows:

    Traceback (most recent call last):
  File "/home/gopudfki/anaconda3/envs/tf2_deepai/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/home/gopudfki/Dokumente/Important Materials/Practise and Revision/Coding and Interview/Compition Programm/python/FrameWork Tutorial/TF2 Tutorial/Projects/Sequence2SequenceWIthoutAttention/Model.py", line 44, in call
    output, hidden_state, cell_state = self.lstm(input_vector, initial_state=[hidden_state, cell_state])
ValueError: Exception encountered when calling layer "lstm_1" (type LSTM).

Tensor's shape (1, 64, 64) is not compatible with supplied shape [1, 1, 64].

Call arguments received by layer "lstm_1" (type LSTM):
  • inputs=tf.Tensor(shape=(1, 64, 256), dtype=float32)
  • mask=None
  • training=None
  • initial_state=['tf.Tensor(shape=(64, 64), dtype=float32)', 'tf.Tensor(shape=(64, 64), dtype=float32)']
python-BaseException

Shape Mismatch: Tensorflow Implementation for Sequence to Sequence models from scratch

0 Answers0