I am working with the huggingface transformers and training a pretrained byt5-small on my data. I am also trying to do hyperparameter search using Trainer API with optuna as backend. But the following error is appearing every time. Please help me out. below is the while code.
transformer version = 4.23.1
from transformers import HfArgumentParser, TensorFlowBenchmark, TensorFlowBenchmarkArguments
import pandas as pd
from transformers import T5ForConditionalGeneration, ByT5Tokenizer
from transformers import TrainingArguments
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer
import datasets
import transformers
from tqdm import tqdm
from numba import cuda
device = cuda.get_current_device()
device.reset()
train_df = pd.read_csv("/home/bhavuk/project1/data/train_split.csv")
eval_df = pd.read_csv("/home/bhavuk/project1/data/eval_split.csv")
test_df = pd.read_csv("/home/bhavuk/project1/data/test_split.csv")
train_df = train_df.dropna()
eval_df = eval_df.dropna()
test_df = test_df.dropna(subset=["Hypothesis","Reference"])
train_df.shape, eval_df.shape[0], test_df.shape[0]
args_dict = {
"output_dir": './byt5-small-hp-search',
"overwrite_output_dir": True,
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 2,
"gradient_accumulation_steps": 4,
"learning_rate": 1e-1,
"warmup_steps": 2,
"logging_steps": 100,
"evaluation_strategy": "steps",
"eval_steps": 250,
"num_train_epochs": 4,
"do_train": True,
"do_eval": True,
"fp16": False,
"max_steps": 100000,
"load_best_model_at_end":True,
"logging_dir": './logs',
"save_total_limit" : 2,
"weight_decay" : 0.1,
"label_smoothing_factor" : 0.1
}
parser = HfArgumentParser(
(TrainingArguments))
training_args = parser.parse_dict(args_dict)
args = training_args[0]
def optuna_hp_space(trial):
return {
"learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True),
"dropout_rate": trial.suggest_float("dropout_rate", 0.1, 0.6, step=0.1),
"weight_decay": trial.suggest_float("weight_decay", 0.1, 0.3, step=0.1),
"label_smoothing_factor": trial.suggest_float("label_smoothing_factor", 0.1, 0.3, step=0.1)
}
config = '/home/bhavuk/project1/notebooks/models--google--byt5-small/snapshots/ce8f3a48ed7676af36476a01fb01f95ea529599c/config.json'
def model_init(trial):
return T5ForConditionalGeneration.from_pretrained(
'google/byt5-small',
config=config,
dropout_rate = 0.1
)
tokenizer = ByT5Tokenizer.from_pretrained(
"google/byt5-small",
cache_dir=".",
max_length=512
)
class GPReviewDataset(Dataset):
def __init__(self, Text, Label):
self.Text = Text
self.Label = Label
# self.tokenizer = tokenizer
# self.max_len = max_len
def __len__(self):
return len(self.Text)
def __getitem__(self, item):
Text = str(self.Text[item])
Label = self.Label[item]
inputs = tokenizer(Text, padding="max_length", truncation=True, max_length=512)
outputs = tokenizer(Label, padding="max_length", truncation=True, max_length=512)
return {
"input_ids":inputs.input_ids,
"attention_mask" : inputs.attention_mask,
"labels" : outputs.input_ids,
"decoder_attention_mask" : outputs.attention_mask,
# "labels" : lbz
}
ds_train = GPReviewDataset(
Text=train_df.Hypothesis.to_numpy(),
Label=train_df.Reference.to_numpy()
ds_test = GPReviewDataset(
Text=eval_df.Hypothesis.to_numpy(),
Label=eval_df.Reference.to_numpy()
# tokenizer=tokenizer,
# max_len=max_len
)
train_dataset = ds_train
valid_dataset = ds_test
trainer = Trainer(
model=None,
args=args,
train_dataset=train_dataset,
eval_dataset=valid_dataset,
tokenizer=tokenizer,
model_init=model_init
)
best_trial = trainer.hyperparameter_search(
direction="minimize",
backend="optuna",
hp_space=optuna_hp_space,
n_trials=20
)
ERROR:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/bhavuk/project1/notebooks/byT5small_hp_search_2.ipynb Cell 14 in <cell line: 1>()
----> 1 best_trial = trainer.hyperparameter_search(
2 direction="minimize",
3 backend="optuna",
4 hp_space=optuna_hp_space,
5 n_trials=20
6 )
File ~/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/trainer.py:2368, in Trainer.hyperparameter_search(self, hp_space, compute_objective, n_trials, direction, backend, hp_name, **kwargs)
2360 self.compute_objective = default_compute_objective if compute_objective is None else compute_objective
2362 backend_dict = {
2363 HPSearchBackend.OPTUNA: run_hp_search_optuna,
2364 HPSearchBackend.RAY: run_hp_search_ray,
2365 HPSearchBackend.SIGOPT: run_hp_search_sigopt,
2366 HPSearchBackend.WANDB: run_hp_search_wandb,
2367 }
-> 2368 best_run = backend_dict[backend](self, n_trials, direction, **kwargs)
2370 self.hp_search_backend = None
2371 return best_run
File ~/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/integrations.py:189, in run_hp_search_optuna(trainer, n_trials, direction, **kwargs)
187 n_jobs = kwargs.pop("n_jobs", 1)
188 study = optuna.create_study(direction=direction, **kwargs)
...
return forward_call(*input, **kwargs)
File "/home/bhavuk/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 937, in forward
raise ValueError(f"You have to specify either {err_msg_prefix}input_ids or {err_msg_prefix}inputs_embeds")
ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds