I'm using Pytorch to do huggingface model finetuning with transformers library. I have torch version '1.13.0+cu117'
with python 3.7.8 and CUDA 11.8. But with some copying and pastinng of others' code, I'm getting ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,token_type_ids,attention_mask.
. I've seen this question but as a text generation task, I cannot find a proper function like GPT2+text generation so I used AutoModel+Casual LM.
The traceback info is like this:
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_8240\3982389964.py in <module>
47 )
48
---> 49 trainer.train()
c:\users\fchen\appdata\local\programs\python\python37\lib\site-packages\transformers\trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1503 resume_from_checkpoint=resume_from_checkpoint,
1504 trial=trial,
-> 1505 ignore_keys_for_eval=ignore_keys_for_eval,
1506 )
1507
c:\users\fchen\appdata\local\programs\python\python37\lib\site-packages\transformers\trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1747 tr_loss_step = self.training_step(model, inputs)
1748 else:
-> 1749 tr_loss_step = self.training_step(model, inputs)
1750
1751 if (
c:\users\fchen\appdata\local\programs\python\python37\lib\site-packages\transformers\trainer.py in training_step(self, model, inputs)
2506
2507 with self.compute_loss_context_manager():
-> 2508 loss = self.compute_loss(model, inputs)
2509
2510 if self.args.n_gpu > 1:
c:\users\fchen\appdata\local\programs\python\python37\lib\site-packages\transformers\trainer.py in compute_loss(self, model, inputs, return_outputs)
2552 if isinstance(outputs, dict) and "loss" not in outputs:
2553 raise ValueError(
-> 2554 "The model did not return a loss from the inputs, only the following keys: "
2555 f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
2556 )
ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,token_type_ids,attention_mask.
The entire code is like this. The train.csv
and test.csv
has only lines of sentences in natural language. One sentence per line.
from transformers import TrainingArguments, Trainer
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import TrainingArguments
from datasets import load_dataset
import evaluate
MAX_LEN=100
def tokenize_function(examples):
return tokenizer(examples["sentences"], padding='max_length', truncation=True,max_length=MAX_LEN)
pretrained = "./models/gpt2-chinese-cluecorpussmall/"
tokenizer = AutoTokenizer.from_pretrained(pretrained)
model = AutoModelForCausalLM.from_pretrained(pretrained)
data_files = {"train": "train.csv", "test": "test.csv"}
dataset = load_dataset("csv", data_files=data_files)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
train_dataset = tokenized_datasets["train"].shuffle(seed=42)
eval_dataset = tokenized_datasets["test"].shuffle(seed=42)
training_args = TrainingArguments(
output_dir='./test_trainer',
num_train_epochs=1,
per_device_train_batch_size=2,
per_device_eval_batch_size=2,
learning_rate= 5e-05,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
load_best_model_at_end=True,
logging_steps=400,
save_steps=400,
evaluation_strategy="steps",
report_to=None
)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset =eval_dataset
)
trainer.train()