Error while trying to train an Encoder-Decoder model to convert between string date representations

Question

I want to train an Encoder-Decoder model which converts dates from a string format to a numeric format. For example, I want to convert April 22, 2019" to "2019-04-22".

Here's the code that I used to create the dataset:

months = {
    1 : "January",
    2 : "February",
    3 : "March",
    4 : "April",
    5 : "May",
    6 : "June",
    7 : "July",
    8 : "August",
    9 : "September",
    10 : "October",
    11 : "November",
    12 : "December"
}

def generate_dataset(num_examples, train_ratio, valid_ratio, test_ratio):
    train_set = []
    valid_set = []
    test_set = []

    longest_sequence_length = 19 # longest one is "September 29, 2021" + end of stream token
    
    PADDING_TOKEN = "<pad>"
    END_OF_STREAM_TOKEN = "<EOS>"
    
    for i in range(0, int(num_examples * train_ratio)):
        random_year = np.random.randint(1, 2022)
        random_month = np.random.randint(1, 13)
        random_day = np.random.randint(1, 29)
        # I'm ignoring the fact that different months have different number of days in the above line of code
        random_date_string = [months[random_month]] + [" "] + [str(random_day)] + [", "] + [str(random_year)]
        for i in range(len(random_date_string), (longest_sequence_length - 1)):
            random_date_string = random_date_string + [PADDING_TOKEN]
        random_date_string = random_date_string + [END_OF_STREAM_TOKEN]
        # I also probably don't need an explicit cast to string
        random_date_numeric = [str(random_year)] + ["-"] + [str(random_month)] + ["-"] + [str(random_day)]
        for i in range(len(random_date_numeric), (longest_sequence_length - 1)):
            random_date_numeric = random_date_numeric + [PADDING_TOKEN]
        random_date_numeric = random_date_numeric + [END_OF_STREAM_TOKEN]
        
        
        train_set.append([random_date_string, random_date_numeric])
    
    for i in range(int(num_examples * train_ratio), int(num_examples * train_ratio + num_examples * valid_ratio)):
        random_year = np.random.randint(1, 2022)
        random_month = np.random.randint(1, 13)
        random_day = np.random.randint(1, 29)
        random_date_string = [months[random_month]] + [" "] + [str(random_day)] + [", "] + [str(random_year)]
        for i in range(len(random_date_string), (longest_sequence_length - 1)):
            random_date_string = random_date_string + [PADDING_TOKEN]
        random_date_string = random_date_string + [END_OF_STREAM_TOKEN]
        random_date_numeric = [str(random_year)] + ["-"] + [str(random_month)] + ["-"] + [str(random_day)]
        for i in range(len(random_date_numeric), (longest_sequence_length - 1)):
            random_date_numeric = random_date_numeric + [PADDING_TOKEN]
        random_date_numeric = random_date_numeric + [END_OF_STREAM_TOKEN]
        
        
        valid_set.append([random_date_string, random_date_numeric])
    
    for i in range(int(num_examples * train_ratio + num_examples * valid_ratio), num_examples):
        random_year = np.random.randint(1, 2022)
        random_month = np.random.randint(1, 13)
        random_day = np.random.randint(1, 29)
        random_date_string = [months[random_month]] + [" "] + [str(random_day)] + [", "] + [str(random_year)]
        for i in range(len(random_date_string), (longest_sequence_length - 1)):
            random_date_string = random_date_string + [PADDING_TOKEN]
        random_date_string = random_date_string + [END_OF_STREAM_TOKEN]
        random_date_numeric = [str(random_year)] + ["-"] + [str(random_month)] + ["-"] + [str(random_day)]
        for i in range(len(random_date_numeric), (longest_sequence_length - 1)):
            random_date_numeric = random_date_numeric + [PADDING_TOKEN]
        random_date_numeric = random_date_numeric + [END_OF_STREAM_TOKEN]
        
        
        test_set.append([random_date_string, random_date_numeric])
    
    return train_set, valid_set, test_set

train_set, valid_set, test_set = generate_dataset(10000, 0.7, 0.1, 0.2)

train_set = np.array(train_set)
valid_set = np.array(valid_set)
test_set = np.array(test_set)

Here's how a data entry looks like (train_set[0]):

array([['July', ' ', '6', ', ', '225', '<pad>', '<pad>', '<pad>',
        '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>',
        '<pad>', '<pad>', '<pad>', '<EOS>'],
       ['225', '-', '7', '-', '6', '<pad>', '<pad>', '<pad>', '<pad>',
        '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>',
        '<pad>', '<pad>', '<EOS>']], dtype='<U9')

Here's the dimension of my training set:

(7000, 2, 19)

I then proceed to add the start-of-sequence tokens:

START_OF_SEQUENCE_TOKEN = "<SOS>"

train_set_with_start_of_sequence_token = []

for entry in train_set:
    new_entry_0 = np.insert(entry[0][:-1], 0, START_OF_SEQUENCE_TOKEN)
    new_entry_1 = np.insert(entry[1][:-1], 0, START_OF_SEQUENCE_TOKEN)
    new_entry = [new_entry_0, new_entry_1]
    train_set_with_start_of_sequence_token.append(np.array(new_entry))

train_set_with_start_of_sequence_token = np.array(train_set_with_start_of_sequence_token)

seq_lengths = np.full([7000], 19) # these are the dimensions of my training set

Here's the Encoder-Decoder model:

import tensorflow_addons as tfa

encoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
decoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
sequence_lengths = keras.layers.Input(shape=[], dtype=np.int32)

vocab_size = 2021 * 12 * 23 # the total number of possible dates
embed_size = 512

embeddings = keras.layers.Embedding(vocab_size, embed_size)
encoder_embeddings = embeddings(encoder_inputs)
decoder_embeddings = embeddings(decoder_inputs)

encoder = keras.layers.LSTM(512, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embeddings)
encoder_state = [state_h, state_c]

sampler = tfa.seq2seq.sampler.TrainingSampler()

decoder_cell = keras.layers.LSTMCell(512)
output_layer = keras.layers.Dense(vocab_size)
decoder = tfa.seq2seq.basic_decoder.BasicDecoder(decoder_cell, sampler,
                                                 output_layer=output_layer)
final_outputs, final_state, final_sequence_lengths = decoder(
    decoder_embeddings, initial_state=encoder_state,
    sequence_length=sequence_lengths)
Y_proba = tf.nn.softmax(final_outputs.rnn_output)

model = keras.models.Model(
    inputs=[encoder_inputs, decoder_inputs, sequence_lengths],
    outputs=[Y_proba])

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")

history = model.fit([train_set[:, 1, :], train_set_with_start_of_sequence_token[:, 1, :], seq_lengths], train_set[:, 0, :], epochs=2)

When I try to call the model's fit method, I get the following error:

Train on 7000 samples
Epoch 1/2
  32/7000 [..............................] - ETA: 35:46

---------------------------------------------------------------------------
UnimplementedError                        Traceback (most recent call last)
<ipython-input-19-e60cd6fd68df> in <module>
----> 1 history = model.fit([train_set[:, 1, :], train_set_with_start_of_sequence_token[:, 1, :], seq_lengths], train_set[:, 0, :], epochs=2)

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    340                 mode=ModeKeys.TRAIN,
    341                 training_context=training_context,
--> 342                 total_epochs=epochs)
    343             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    344 

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    126         step=step, mode=mode, size=current_batch_size) as batch_logs:
    127       try:
--> 128         batch_outs = execution_function(iterator)
    129       except (StopIteration, errors.OutOfRangeError):
    130         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     96     # `numpy` translates Tensors to values in Eager mode.
     97     return nest.map_structure(_non_none_constant_value,
---> 98                               distributed_function(input_fn))
     99 
    100   return execution_function

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    566         xla_context.Exit()
    567     else:
--> 568       result = self._call(*args, **kwds)
    569 
    570     if tracing_count == self._get_tracing_count():

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
    630         # Lifting succeeded, so variables are initialized and we can run the
    631         # stateless function.
--> 632         return self._stateless_fn(*args, **kwds)
    633     else:
    634       canon_args, canon_kwds = \

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   2361     with self._lock:
   2362       graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2363     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   2364 
   2365   @property

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1609          if isinstance(t, (ops.Tensor,
   1610                            resource_variable_ops.BaseResourceVariable))),
-> 1611         self.captured_inputs)
   1612 
   1613   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1690       # No tape is watching; skip to running the function.
   1691       return self._build_call_outputs(self._inference_function.call(
-> 1692           ctx, args, cancellation_manager=cancellation_manager))
   1693     forward_backward = self._select_forward_and_backward_functions(
   1694         args,

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    543               inputs=args,
    544               attrs=("executor_type", executor_type, "config_proto", config),
--> 545               ctx=ctx)
    546         else:
    547           outputs = execute.execute_with_cancellation(

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     65     else:
     66       message = e.message
---> 67     six.raise_from(core._status_to_exception(e.code, message), None)
     68   except TypeError as e:
     69     keras_symbolic_tensors = [

~/anaconda3/envs/tf2/lib/python3.7/site-packages/six.py in raise_from(value, from_value)

UnimplementedError:  Cast string to int32 is not supported
     [[node Cast (defined at <ipython-input-19-e60cd6fd68df>:1) ]] [Op:__inference_distributed_function_5170]

Function call stack:
distributed_function

I also tried making a TensorFlow dataset, as so:

train_set_string = tf.data.Dataset.from_tensor_slices(train_set[:, 0, :])
train_set_numeric = tf.data.Dataset.from_tensor_slices(train_set[:, 1, :])
train_set_with_start_of_sequence_token_numeric = tf.data.Dataset.from_tensor_slices(train_set_with_start_of_sequence_token[:, 1, :])

train_set_string = train_set_string.batch(1).prefetch(1)
train_set_numeric = train_set_numeric.batch(1).prefetch(1)
train_set_with_start_of_sequence_token_numeric = train_set_with_start_of_sequence_token_numeric.batch(1).prefetch(1)

history = model.fit([train_set_numeric, train_set_with_start_of_sequence_token_numeric, seq_lengths], train_set_string, epochs=2)

If I try the above code, I get the error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-24-4ddb13a87ce5> in <module>
----> 1 history = model.fit([train_set_numeric, train_set_with_start_of_sequence_token_numeric, seq_lengths], train_set_string, epochs=2)

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    233           max_queue_size=max_queue_size,
    234           workers=workers,
--> 235           use_multiprocessing=use_multiprocessing)
    236 
    237       total_samples = _get_total_number_of_samples(training_data_adapter)

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    531                      'at same time.')
    532 
--> 533   adapter_cls = data_adapter.select_data_adapter(x, y)
    534 
    535   # Handle validation_split, we want to split the data and get the training

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/data_adapter.py in select_data_adapter(x, y)
    996         "Failed to find data adapter that can handle "
    997         "input: {}, {}".format(
--> 998             _type_name(x), _type_name(y)))
    999   elif len(adapter_cls) > 1:
   1000     raise RuntimeError(

ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>", "<class 'tensorflow.python.data.ops.dataset_ops.PrefetchDataset'>"}), <class 'tensorflow.python.data.ops.dataset_ops.PrefetchDataset'>

If I try:

history = model.fit([np.array(train_set_numeric), np.array(train_set_with_start_of_sequence_token_numeric), seq_lengths], np.array(train_set_string), epochs=2)

I get:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-26-7bed963333ee> in <module>
----> 1 history = model.fit([np.array(train_set_numeric), np.array(train_set_with_start_of_sequence_token_numeric), seq_lengths], np.array(train_set_string), epochs=2)

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    212         steps_per_epoch,
    213         ModeKeys.TRAIN,
--> 214         validation_split=validation_split)
    215     dist_utils.validate_callbacks(input_callbacks=callbacks,
    216                                   optimizer=model.optimizer)

~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/distribute/distributed_training_utils.py in process_batch_and_step_size(strategy, inputs, batch_size, steps_per_epoch, mode, validation_split)
    460   first_x_value = nest.flatten(inputs)[0]
    461   if isinstance(first_x_value, np.ndarray):
--> 462     num_samples = first_x_value.shape[0]
    463     if validation_split and 0. < validation_split < 1.:
    464       num_samples = int(num_samples * (1 - validation_split))

IndexError: tuple index out of range

Can someone tell me what's going on here and how do I fix it?

When I run your code I get `NameError: name 'seq_lengths' is not defined`. Please see how to create a [minimal, reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) — o-90, Mar 21 '21 at 15:01
@gobrewers14 I sincerely apologize for that. I must have overlooked that part of my code when copying relevant code parts from my Jupyter Notebook. I edited the question and added all the relevant code. — Join_Where, Mar 21 '21 at 16:29

Error while trying to train an Encoder-Decoder model to convert between string date representations

0 Answers0