I tried to fine tune a model with my personal information. So I can create a chat box where people can learn about me via chat gpt.
However, I got the error of
RuntimeError: stack expects each tensor to be equal size, but got [47] at entry 0 and [36] at entry 1
Because I have different length of input
Here are 2 of my sample input
What is the webisite of ABC company ? -> https://abcdef.org/
Do you know the website of ABC company ? -> It is https://abcdef.org/
Here is what I have tried so far
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from torch.utils.data import Dataset, DataLoader
class QADataset(Dataset):
def __init__(self, questions, answers, tokenizer, max_length):
self.questions = questions
self.answers = answers
self.tokenizer = tokenizer
self.max_length = max_length
# Add a padding token to the tokenizer
self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
def __len__(self):
return len(self.questions)
def __getitem__(self, index):
question = self.questions[index]
answer = self.answers[index]
input_text = f"Q: {question} A: {answer}"
input_ids = self.tokenizer.encode(input_text, add_special_tokens=True, max_length=self.max_length, padding=True, truncation=True)
if input_ids is None:
return None
input_ids = torch.tensor(input_ids, dtype=torch.long)
print(f"Input ids size: {input_ids.size()}")
return input_ids
# Set up the tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
# Load the question and answer data
questions = ["What is the webisite of ABC company ?", "Do you know the website of ABC company ?"]
answers = ["https://abcdef.org/", "It is https://abcdef.org/"]
# Create the dataset and data loader
max_length = 64
dataset = QADataset(questions, answers, tokenizer, max_length=max_length)
data_loader = DataLoader(dataset, batch_size=8, shuffle=True)
# Fine-tune the model on the QA dataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()
for epoch in range(3):
running_loss = 0.0
for batch in data_loader:
batch = batch.to(device)
outputs = model(batch, labels=batch)
loss, _ = outputs[:2]
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch + 1} loss: {running_loss / len(data_loader)}")
# Save the fine-tuned model
model.save_pretrained("qa_finetuned_gpt2")
I dont have a solid background of AI, it is more like reading references and try to implement it.