0

I am trying to run hyperparameter optimization on huggingface transformer using wandb.

I tried following the example in documentation in hugginface: https://huggingface.co/docs/transformers/hpo_train I am getting this weird error when trying to do this:

Traceback (most recent call last):
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\threading.py", line 932, in _bootstrap_inner
    self.run()
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\agents\pyagent.py", line 303, in _run_job
    wandb.finish(exit_code=1)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 3716, in finish
    wandb.run.finish(exit_code=exit_code, quiet=quiet)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 393, in wrapper
    return func(self, *args, **kwargs)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 334, in wrapper
    return func(self, *args, **kwargs)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 1883, in finish
    return self._finish(exit_code, quiet)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 1891, in _finish
    tel.feature.finish = True
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\telemetry.py", line 42, in __exit__
    self._run._telemetry_callback(self._obj)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 716, in _telemetry_callback
    self._telemetry_flush()
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\wandb_run.py", line 727, in _telemetry_flush
    self._backend.interface._publish_telemetry(self._telemetry_obj)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\interface\interface_shared.py", line 101, in _publish_telemetry
    self._publish(rec)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\interface\interface_sock.py", line 51, in _publish
    self._sock_client.send_record_publish(record)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 221, in send_record_publish
    self.send_server_request(server_req)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 155, in send_server_request
    self._send_message(msg)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 152, in _send_message
    self._sendall_with_error_handle(header + data)
  File "C:\Users\cinra\AppData\Local\miniconda3\envs\SOPenvHyperParameterTuning\lib\site-packages\wandb\sdk\lib\sock_client.py", line 130, in _sendall_with_error_handle
    sent = self._sock.send(data)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host

Here is my code in case anyone wants to help:

import pandas as pd
from datasets import load_dataset
from datasets import Dataset
import wandb
import os



# wandb
os.environ["WANDB_PROJECT"]="confidencepredictor"
os.environ["WANDB_LOG_MODEL"]="true"
os.environ["WANDB_WATCH"]="false"


save_dir = "train15"

wandb.init(project="confidencepredictor",
           name=f"{save_dir}")


validation_size_ratio = 0.1
test_size_ratio = 0.05
BASE_MODEL = "distilbert-base-uncased"

df = pd.read_csv("Data/Area_LPF_1000_processed.csv", sep=";")

Num_data = len(df)

df = df[["ServiceOrderText","LPF_area"]]

df.columns=["text","label"]

print(f"Num_data = {Num_data}, Validation_size_ratio = {validation_size_ratio}, Test_size_ratio = {test_size_ratio}, Base Model = {BASE_MODEL}")




dataset  = Dataset.from_pandas(df).select(range(1000)).shuffle(seed=42).train_test_split(test_size=test_size_ratio,seed=42)
temp_dataset = dataset["train"].train_test_split(test_size=validation_size_ratio,seed=42)

dataset["train"] = temp_dataset["train"]
dataset["validation"] = temp_dataset["test"]


from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)


def tokenize_function(examples):
    # label = examples["label"]

    examples=tokenizer(examples["text"], padding="max_length", truncation=True,max_length=250)
    # examples["label"] = float(label)

    return examples


tokenized_datasets = dataset.map(tokenize_function, batched=True)


small_train_dataset = tokenized_datasets["train"].shuffle(seed=42)
small_eval_dataset = tokenized_datasets["validation"].shuffle(seed=42)
small_test_dataset = tokenized_datasets["test"].shuffle(seed=42)

from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=1)
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

def compute_metrics_for_regression(eval_pred):
    logits, labels = eval_pred
    labels = labels.reshape(-1, 1)

    mse = mean_squared_error(labels, logits)
    mae = mean_absolute_error(labels, logits)
    r2 = r2_score(labels, logits)
    single_squared_errors = ((logits - labels).flatten()**2).tolist()

    # Compute accuracy
    # Based on the fact that the rounded score = true score only if |single_squared_errors| < 0.5
    accuracy = sum([1 for e in single_squared_errors if abs(e) < 0.25]) / len(single_squared_errors)

    return {"mse": mse, "mae": mae, "r2": r2, "accuracy": accuracy}



from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir=f"results/{save_dir}",
    learning_rate=5e-7,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=500,
    save_total_limit=5,
    metric_for_best_model="mae",
    load_best_model_at_end=True,
    eval_steps=20,
    weight_decay=0.001,
   report_to="wandb"
)

import torch

class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0][:, 0]
        loss = torch.nn.functional.huber_loss(logits, labels)
        return (loss, outputs) if return_outputs else loss

def model_init(trial):
    return AutoModelForSequenceClassification.from_pretrained(
        'distilbert-base-uncased', return_dict=True)


trainer = RegressionTrainer(
    model=None,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics_for_regression,
    model_init=model_init
)


# trainer.train()


def wandb_hp_space(trial):
    return {
        "method": "random",
        "metric": {"name": "objective", "goal": "minimize"},
        "parameters": {
            "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
            "per_device_train_batch_size": {"values": [4,8,16]},
        },
    }

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="wandb",
    hp_space=wandb_hp_space,
    n_trials=3
)
Tyler2P
  • 2,324
  • 26
  • 22
  • 31

0 Answers0