ValueError: Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 2]))

Question

I'm trying to implement a code for sentiment analysis( positive or negative labels) using BERT and i want to add a BiLSTM layer to see if I can increase the accuracy of the pretrained model from HuggingFace. I have the below code and a few questions :

import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from torch import cuda
import re
import torch.nn as nn

device = 'cuda' if cuda.is_available() else 'cpu'
MAX_LEN = 200
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 1
LEARNING_RATE = 1e-05 #5e-5, 3e-5 or 2e-5
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class CustomDataset(Dataset):
 def __init__(self, dataframe, tokenizer, max_len):
  self.tokenizer = tokenizer
  self.data = dataframe
  self.comment_text = dataframe.review
  self.targets = self.data.sentiment
  self.max_len = max_len
 def __len__(self):
  return len(self.comment_text)
 def __getitem__(self, index):
  comment_text = str(self.comment_text[index])
  comment_text = " ".join(comment_text.split())

  inputs = self.tokenizer.encode_plus(comment_text,None,add_special_tokens=True,max_length=self.max_len,
   pad_to_max_length=True,return_token_type_ids=True)
  ids = inputs['input_ids']
  mask = inputs['attention_mask']
  token_type_ids = inputs["token_type_ids"]

  return {
   'ids': torch.tensor(ids, dtype=torch.long),
   'mask': torch.tensor(mask, dtype=torch.long),
   'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
   'targets': torch.tensor(self.targets[index], dtype=torch.float)
  }
train_size = 0.8
train_dataset=df.sample(frac=train_size,random_state=200)
test_dataset=df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)
train_params = {'batch_size': TRAIN_BATCH_SIZE,'shuffle': True,'num_workers': 0}
test_params = {'batch_size': VALID_BATCH_SIZE,'shuffle': True,'num_workers': 0}
training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)


class BERTClass(torch.nn.Module):
 def __init__(self):
   super(BERTClass, self).__init__()
   self.bert = BertModel.from_pretrained('bert-base-uncased',return_dict=False, num_labels =2)
   self.lstm = nn.LSTM(768, 256, batch_first=True, bidirectional=True)
   self.linear = nn.Linear(256*2,2)

 def forward(self, ids , mask,token_type_ids):
  sequence_output, pooled_output = self.bert(ids, attention_mask=mask, token_type_ids = token_type_ids)
  lstm_output, (h, c) = self.lstm(sequence_output)  ## extract the 1st token's embeddings
  hidden = torch.cat((lstm_output[:, -1, :256], lstm_output[:, 0, 256:]), dim=-1)
  linear_output = self.linear(lstm_output[:, -1].view(-1, 256 * 2))

  return linear_output

model = BERTClass()
model.to(device)
print(model)
def loss_fn(outputs, targets):
 return torch.nn.BCEWithLogitsLoss()(outputs, targets)
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

def train(epoch):
 model.train()
 for _, data in enumerate(training_loader, 0):
  ids = data['ids'].to(device, dtype=torch.long)
  mask = data['mask'].to(device, dtype=torch.long)
  token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
  targets = data['targets'].to(device, dtype=torch.float)
  outputs = model(ids, mask, token_type_ids)
  optimizer.zero_grad()
  loss = loss_fn(outputs, targets)
  if _ % 5000 == 0:
   print(f'Epoch: {epoch}, Loss:  {loss.item()}')
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

for epoch in range(EPOCHS):
  train(epoch)

So on the above code I ran into the error : Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 2])) . Checked online and tried to use targets = targets.unsqueeze(2) but then I get another error that I must use values from [-2,1] for unsqueeze. I also tried to modify the loss function to

def loss_fn(outputs, targets):
 return torch.nn.BCELoss()(outputs, targets)

but I still receive the same error. Can someone advise if there is a solution to this problem? Or what can I do to make this work fine? Many thanks in advance.

The call to loss_fn loss = loss_fn(outputs, targets), and then "return torch.nn.BCEWithLogitsLoss()(outputs, targets) " . I tried with BCEloss and it is the same error. — Qs13, May 04 '21 at 10:25
Since it is a binary classification( into 1-positive or 0- negative) is best to use BertForSequenceClassification or I can also use the BertModel? I tried using BertForSequenceClassification but I have some issues on the forward part. Could you advise, please? — Qs13, May 06 '21 at 11:41
In the previous case BCE loss isn't working because your output dimension is 2 instead of 1. — Ashwin Geet D'Sa, May 06 '21 at 11:59
So if i Change to BertForSequenceClassification which i thing it's more suitable for my task, I get the error `ValueError: not enough values to unpack (expected 2, got 1)` on this line `sequence_output, pooled_output = self.bert(ids, attention_mask=mask, token_type_ids = token_type_ids)` — Qs13, May 06 '21 at 12:17
@AshwinGeetD'Sa on this `nn.Linear(256*2,2)` the out feature should be the number of labels which is 2(positive or negative) or should be 1 as for each sentence it should assign one of the 2 labels ? — Qs13, May 07 '21 at 09:54
Referencing this - https://stackoverflow.com/questions/71318599/bert-classifier-valueerror-target-size-torch-size4-1-must-be-the-same-as — abby37, Jul 03 '22 at 21:09

ValueError: Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 2]))

0 Answers0