i am traing to buld bert model text clasifiction by using bert and distilbert and roberta in the same time. this is model arch
class BERT_Arch(nn.Module):
def __init__(self, bert):
super(BERT_Arch, self).__init__()
self.bert = bert
self.dropout = nn.Dropout(0.1)
self.relu = nn.ReLU()
self.fc1 = nn.Linear(768,512)
self.fc2 = nn.Linear(512,2)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, sent_id, mask):
cls_hs = self.bert(sent_id, attention_mask=mask)["last_hidden_state"]
print(cls_hs.shape)
x = self.fc1(cls_hs)
x = self.relu(x)
x = self.dropout(x)
x = self.fc2(x)
x = self.softmax(x)
return x
and this is train func
def train():
model.train()
total_loss, total_accuracy = 0, 0
for step,batch in enumerate(train_dataloader):
if step % 50 == 0 and not step == 0:
print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
batch = [r for r in batch]
sent_id, mask, labels = batch
model.zero_grad()
preds = model(sent_id, mask)
print(preds.shape)
loss = cross_entropy(preds, labels)
total_loss = total_loss + loss.item()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
preds=preds.detach().cpu().numpy()
avg_loss = total_loss / len(train_dataloader)
return avg_loss
when i use bert or roberta i shoud change ["last_hidden_state"] to ["pooler_output"] its work so good but when i use distilbert and change to ["last_hidden_state"] i got this error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-25-ce6e6039f1a1> in <cell line: 8>()
8 for epoch in range(epochs):
9 print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
---> 10 train_loss = train()
11 valid_loss = evaluate()
12 if valid_loss < best_valid_loss:
3 frames
<ipython-input-24-1beab048b9a5> in train()
12 preds = model(sent_id, mask)
13 print(preds.shape)
---> 14 loss = cross_entropy(preds, labels)
15
16 total_loss = total_loss + loss.item()
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
214
215 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 216 return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
217
218
/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2702 if size_average is not None or reduce is not None:
2703 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2704 return torch._C._nn.nll_loss_nd(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2705
2706
RuntimeError: Expected target size [32, 2], got [32]
plz some one help me to fix the error model i use dbmdz/bert-base-turkish-uncased distilbert-base-uncased urakaytan/roberta-base-turkish-uncased
i try to bulld bert model i try with bert and roberta its work but with distil is not working and i dont know why