0

I am trying to train a GPT2 model on the wikipedia text. While doing so I get the following error:

ValueError: Unexpected result of `train_function` (Empty logs). Please use 
`Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

The error happens when my code calls history = model.fit(dataset, epochs=num_epoch)

My code is below:

from tokenise import BPE_token
from pathlib import Path
import tensorflow as tf
from transformers import GPT2Config, TFGPT2LMHeadModel, GPT2Tokenizer
import os

# the folder 'text' contains all the files
paths = [str(x) for x in Path("./text/").glob("**/*.txt")]
tokenizer = BPE_token()

# train the tokenizer model
tokenizer.bpe_train(paths)

# saving the tokenized data in our specified folder 
save_path = 'tokenized_data'
tokenizer.save_tokenizer(save_path)

# loading tokenizer from the saved model path
tokenizer = GPT2Tokenizer.from_pretrained(save_path)
tokenizer.add_special_tokens({
  "eos_token": "</s>",
  "bos_token": "<s>",
  "unk_token": "<unk>",
  "pad_token": "<pad>",
  "mask_token": "<mask>"
})

# creating the configurations from which the model can be made
config = GPT2Config(
  vocab_size=tokenizer.vocab_size,
  bos_token_id=tokenizer.bos_token_id,
  eos_token_id=tokenizer.eos_token_id
)

# creating the model
model = TFGPT2LMHeadModel(config)

# We also create a single string from all our documents and tokenize it.

single_string = ''
for filename in paths:
  with open(filename, "r", encoding='utf-8') as f:
   x = f.read()
  single_string += x + tokenizer.eos_token
string_tokenized = tokenizer.encode(single_string)

examples = []
block_size = 100
BATCH_SIZE = 12
BUFFER_SIZE = 1000

for i in range(0, len(string_tokenized) - block_size + 1, block_size):
  examples.append(string_tokenized[i:i + block_size])

inputs, labels = [], []
for ex in examples:
  inputs.append(ex[:-1])
  labels.append(ex[1:])

dataset = tf.data.Dataset.from_tensor_slices((inputs, labels))
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# Model Training

# defining our optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)

# definining our loss function
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# defining our metric which we want to observe
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

# compiling the model
model.compile(optimizer=optimizer, loss=[loss, *[None] * model.config.n_layer], \
  metrics=[metric], run_eagerly=True)

# Now, let’s train the model
num_epoch = 10
history = model.fit(dataset, epochs=num_epoch)
desertnaut
  • 57,590
  • 26
  • 140
  • 166
steve landiss
  • 1,833
  • 3
  • 19
  • 30

0 Answers0