-1

i am traing to buld bert model text clasifiction by using bert and distilbert and roberta in the same time. this is model arch

class BERT_Arch(nn.Module):
    def __init__(self, bert):
      super(BERT_Arch, self).__init__()
      self.bert = bert
      self.dropout = nn.Dropout(0.1)
      self.relu =  nn.ReLU()
      self.fc1 = nn.Linear(768,512)
      self.fc2 = nn.Linear(512,2)
      self.softmax = nn.LogSoftmax(dim=1)
    def forward(self, sent_id, mask):
      cls_hs = self.bert(sent_id, attention_mask=mask)["last_hidden_state"]
      print(cls_hs.shape)
      x = self.fc1(cls_hs)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.fc2(x)
      x = self.softmax(x)
      return x

and this is train func

def train():
  model.train()
  total_loss, total_accuracy = 0, 0

  for step,batch in enumerate(train_dataloader):
    if step % 50 == 0 and not step == 0:
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))
    batch = [r for r in batch]
    sent_id, mask, labels = batch
    model.zero_grad()
    preds = model(sent_id, mask)
    print(preds.shape)
    loss = cross_entropy(preds, labels)

    total_loss = total_loss + loss.item()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    optimizer.step()
    preds=preds.detach().cpu().numpy()

  avg_loss = total_loss / len(train_dataloader)

  return avg_loss

when i use bert or roberta i shoud change ["last_hidden_state"] to ["pooler_output"] its work so good but when i use distilbert and change to ["last_hidden_state"] i got this error

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-25-ce6e6039f1a1> in <cell line: 8>()
      8 for epoch in range(epochs):
      9     print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
---> 10     train_loss = train()
     11     valid_loss = evaluate()
     12     if valid_loss < best_valid_loss:

3 frames
<ipython-input-24-1beab048b9a5> in train()
     12     preds = model(sent_id, mask)
     13     print(preds.shape)
---> 14     loss = cross_entropy(preds, labels)
     15 
     16     total_loss = total_loss + loss.item()

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1499                 or _global_backward_pre_hooks or _global_backward_hooks
   1500                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501             return forward_call(*args, **kwargs)
   1502         # Do not call functions when jit is used
   1503         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
    214 
    215     def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 216         return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
    217 
    218 

/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2702     if size_average is not None or reduce is not None:
   2703         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2704     return torch._C._nn.nll_loss_nd(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   2705 
   2706 

RuntimeError: Expected target size [32, 2], got [32]

plz some one help me to fix the error model i use dbmdz/bert-base-turkish-uncased distilbert-base-uncased urakaytan/roberta-base-turkish-uncased

i try to bulld bert model i try with bert and roberta its work but with distil is not working and i dont know why

1 Answers1

0

i solev it cls_hs = self.bert(sent_id, attention_mask=mask)["last_hidden_state"] shoud be

output_1 = self.bert(sent_id, attention_mask=mask)
  hidden_state = output_1[0]
  poler = hidden_state[:, 0]