I am new to pytorch. I'm fine tuning a Bert model to do a text binary-classification. Once I run my code, I get this following error :
Expected input batch_size (1) to match target batch_size (64).
Here's my model structure :
from torch import nn
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
#device = "cpu"
print(f"Using {device} device")
class DistilBertClassification(nn.Module):
def __init__(self):
super(DistilBertClassification, self).__init__()
self.dbert = dbert_pt
self.dropout = nn.Dropout(p=0.1)
self.linear1 = nn.Linear(768,64)
self.ReLu = nn.ReLU()
self.linear2 = nn.Linear(64,2)
def forward(self, x):
#print('----forward checkpoint 1----')
x = self.dbert(input_ids=x[0])
x = x["last_hidden_state"][:,0,:]
x = self.dropout(x)
x = self.linear1(x)
x = self.ReLu(x)
logits = self.linear2(x)
return logits
model_pt = DistilBertClassification().to(device)
and here's the part of the code that gives the error :
it's when I try to compute the loss using loss = criterion(prediction , y.cuda())
from tqdm import tqdm
# pour chaque epochs
for e in range(epochs):
model_pt.train()
train_loss = 0.0
train_accuracy = []
# Loop on batches
for X, y in tqdm(train_loader_pt):
# Get prediction & loss
prediction = model_pt(X.cuda())
loss = criterion(prediction , y.cuda())
# Adjust the parameters of the model
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
prediction_index = prediction.argmax(axis=1)
accuracy = (prediction_index==y.cuda())
train_accuracy += accuracy
train_accuracy = (sum(train_accuracy) / len(train_accuracy)).item()
the shape of prediction is : torch.Size([1, 2])
while the shape of y is : torch.Size([64])