I am a beginner in this. Please help me in getting a solution. I have used the RobertaTokenizerFast to tokenize the text and summary (max_token_length 200 and 50 respectively). The plan is to use RoBERTa as the first layer. Then condense its output to match the target summary using conv2d, maxpool2d, and dense. The output of the last dense layer is a float vector. So, I have normalized the target vector, containing long input_ids, into float values (0 to 1). Finally, I have used CrossEntropy function to get the loss.
class Summarizer(pl.LightningModule):
def __init__(self):
super().__init__()
self.roberta = RobertaModel.from_pretrained('roberta-base', return_dict = True, is_decoder=True, use_cache=False)
self.convlayer = torch.nn.Conv2d(in_channels=BATCH_SIZE, out_channels=1, kernel_size=4)
## BATCH_SIZE=20
self.relu = torch.nn.ReLU()
self.fc = torch.nn.Linear(in_features=97*381, out_features=50)
self.cross_entropy_loss = torch.nn.CrossEntropyLoss()
def forward(self, input_ids, attention_mask, labels):
output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
x = output['last_hidden_state']
x = torch.unsqueeze(x, 0)
x = self.convlayer(x)
x = self.relu(x)
x = F.max_pool2d(x, kernel_size=4, stride=2)
x = x.squeeze().flatten()
x = self.fc(x)
output = self.relu(x)
crossent_loss = self.cross_entropy_loss(labels, output)
return crossent_loss, output
def training_step(self, batch, batch_idx):
input_ids = batch['text_input_ids']
attention_mask = batch['text_attention_mask']
l = batch['labels'].float()
l = torch.tensor(l/torch.linalg.norm(l))
labels = l # normalized labels in (0,1)
labels_attention_mask = batch['labels_attention_mask']
loss, outputs = self(
input_ids = input_ids,
attention_mask = attention_mask,
labels = labels
)
self.log('train_loss', loss, prog_bar = True, logger = True)
return loss
def validation_step(self, batch, batch_idx):
input_ids = batch['text_input_ids']
attention_mask = batch['text_attention_mask']
l = batch['labels'].float()
l = torch.tensor(l/torch.linalg.norm(l))
labels = l
labels_attention_mask = batch['labels_attention_mask']
loss, outputs = self(
input_ids = input_ids,
attention_mask = attention_mask,
labels = labels
)
self.log('val_loss', loss, prog_bar = True, logger = True)
return loss
def test_step(self, batch, batch_idx):
input_ids = batch['text_input_ids']
attention_mask = batch['text_attention_mask']
l = batch['labels'].float()
l = torch.tensor(l/torch.linalg.norm(l))
labels = l
labels_attention_mask = batch['labels_attention_mask']
loss, outputs = self(
input_ids = input_ids,
attention_mask = attention_mask,
labels = labels
)
self.log('test_loss', loss, prog_bar = True, logger = True)
return loss
def configure_optimizers(self):
return AdamW(self.parameters(), lr=0.0001)
The training using pl.Trainer
returns ValueError: Expected input batch_size (20) to match target batch_size (50).
I could not get the error.