I am trying to run hyperparameter optimization on huggingface transformer using wandb.
I tried following the example in documentation in hugginface: https://huggingface.co/docs/transformers/hpo_train I am getting this weird error when trying to do this:
Traceback (most recent call last):
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\agents\pyagent.py", line 303, in _run_job
wandb.finish(exit_code=1)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 3716, in finish
wandb.run.finish(exit_code=exit_code, quiet=quiet)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 393, in wrapper
return func(self, *args, **kwargs)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 334, in wrapper
return func(self, *args, **kwargs)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 1883, in finish
return self._finish(exit_code, quiet)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 1891, in _finish
tel.feature.finish = True
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\telemetry.py", line 42, in __exit__
self._run._telemetry_callback(self._obj)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 716, in _telemetry_callback
self._telemetry_flush()
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 727, in _telemetry_flush
self._backend.interface._publish_telemetry(self._telemetry_obj)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\interface\interface_shared.py", line 101, in _publish_telemetry
self._publish(rec)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\interface\interface_sock.py", line 51, in _publish
self._sock_client.send_record_publish(record)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 221, in send_record_publish
self.send_server_request(server_req)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 155, in send_server_request
self._send_message(msg)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 152, in _send_message
self._sendall_with_error_handle(header + data)
File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 130, in _sendall_with_error_handle
sent = self._sock.send(data)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Here is my code in case anyone wants to help:
import pandas as pd
from datasets import load_dataset
from datasets import Dataset
import wandb
import os
# wandb
os.environ["WANDB_PROJECT"]="confidencepredictor"
os.environ["WANDB_LOG_MODEL"]="true"
os.environ["WANDB_WATCH"]="false"
save_dir = "train15"
wandb.init(project="confidencepredictor",
name=f"{save_dir}")
validation_size_ratio = 0.1
test_size_ratio = 0.05
BASE_MODEL = "distilbert-base-uncased"
df = pd.read_csv("Data/Area_LPF_1000_processed.csv", sep=";")
Num_data = len(df)
df = df[["ServiceOrderText","LPF_area"]]
df.columns=["text","label"]
print(f"Num_data = {Num_data}, Validation_size_ratio = {validation_size_ratio}, Test_size_ratio = {test_size_ratio}, Base Model = {BASE_MODEL}")
dataset = Dataset.from_pandas(df).select(range(1000)).shuffle(seed=42).train_test_split(test_size=test_size_ratio,seed=42)
temp_dataset = dataset["train"].train_test_split(test_size=validation_size_ratio,seed=42)
dataset["train"] = temp_dataset["train"]
dataset["validation"] = temp_dataset["test"]
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
def tokenize_function(examples):
# label = examples["label"]
examples=tokenizer(examples["text"], padding="max_length", truncation=True,max_length=250)
# examples["label"] = float(label)
return examples
tokenized_datasets = dataset.map(tokenize_function, batched=True)
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42)
small_eval_dataset = tokenized_datasets["validation"].shuffle(seed=42)
small_test_dataset = tokenized_datasets["test"].shuffle(seed=42)
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=1)
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
def compute_metrics_for_regression(eval_pred):
logits, labels = eval_pred
labels = labels.reshape(-1, 1)
mse = mean_squared_error(labels, logits)
mae = mean_absolute_error(labels, logits)
r2 = r2_score(labels, logits)
single_squared_errors = ((logits - labels).flatten()**2).tolist()
# Compute accuracy
# Based on the fact that the rounded score = true score only if |single_squared_errors| < 0.5
accuracy = sum([1 for e in single_squared_errors if abs(e) < 0.25]) / len(single_squared_errors)
return {"mse": mse, "mae": mae, "r2": r2, "accuracy": accuracy}
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
output_dir=f"results/{save_dir}",
learning_rate=5e-7,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=20,
evaluation_strategy="steps",
save_strategy="steps",
save_steps=500,
save_total_limit=5,
metric_for_best_model="mae",
load_best_model_at_end=True,
eval_steps=20,
weight_decay=0.001,
report_to="wandb"
)
import torch
class RegressionTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.pop("labels")
outputs = model(**inputs)
logits = outputs[0][:, 0]
loss = torch.nn.functional.huber_loss(logits, labels)
return (loss, outputs) if return_outputs else loss
def model_init(trial):
return AutoModelForSequenceClassification.from_pretrained(
'distilbert-base-uncased', return_dict=True)
trainer = RegressionTrainer(
model=None,
args=training_args,
train_dataset=small_train_dataset,
eval_dataset=small_eval_dataset,
compute_metrics=compute_metrics_for_regression,
model_init=model_init
)
# trainer.train()
def wandb_hp_space(trial):
return {
"method": "random",
"metric": {"name": "objective", "goal": "minimize"},
"parameters": {
"learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
"per_device_train_batch_size": {"values": [4,8,16]},
},
}
best_trial = trainer.hyperparameter_search(
direction="maximize",
backend="wandb",
hp_space=wandb_hp_space,
n_trials=3
)