How do I fix the UserWarning: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([])) is deprecated

Question

When I try to run my model, I get this UserWarning. How do I go about fixing this? My model's input is expecting (torch.Size([])), a scalar, but my target is torch.Size([1])). Should I be changed my input size or my target size? Any help would be greatly appreciated. Thanks!

UserWarning Message:

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:34: UserWarning: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([])) is deprecated. Please ensure they have the same size.

Model:

TextClassifier(
  (dropout): Dropout(p=0.25, inplace=False)
  (embedding): Embedding(5001, 64, padding_idx=0)
  (conv_1): Conv1d(75, 32, kernel_size=(2,), stride=(2,))
  (conv_2): Conv1d(75, 32, kernel_size=(3,), stride=(2,))
  (conv_3): Conv1d(75, 32, kernel_size=(4,), stride=(2,))
  (conv_4): Conv1d(75, 32, kernel_size=(5,), stride=(2,))
  (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool_2): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool_3): MaxPool1d(kernel_size=4, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool_4): MaxPool1d(kernel_size=5, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=1856, out_features=1, bias=True)
)

Code for my model:

class TextClassifier(nn.ModuleList):

    def __init__(self, params):
        super(TextClassifier, self).__init__()

        # Parameters regarding text preprocessing
        self.seq_len = params.seq_len
        self.num_words = params.num_words
        self.embedding_size = params.embedding_size
      
        # Dropout definition
        self.dropout = nn.Dropout(0.25)
      
        # CNN parameters definition
        # Kernel sizes
        self.kernel_1 = 2
        self.kernel_2 = 3
        self.kernel_3 = 4
        self.kernel_4 = 5
      
      # Output size for each convolution
        self.out_size = params.out_size
      # Number of strides for each convolution
        self.stride = params.stride
      
      # Embedding layer definition
        self.embedding = nn.Embedding(self.num_words + 1, self.embedding_size, padding_idx=0)
      
      # Convolution layers definition
        self.conv_1 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_1, self.stride)
        self.conv_2 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_2, self.stride)
        self.conv_3 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_3, self.stride)
        self.conv_4 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_4, self.stride)
      
      # Max pooling layers definition
        self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride)
        self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride)
        self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride)
        self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride)
      
      # Fully connected layer definition
        self.fc = nn.Linear(self.in_features_fc(), 1)


    def in_features_fc(self):
        '''Calculates the number of output features after Convolution + Max pooling
        Convolved_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1
        Pooled_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1

        source: https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
        '''
        # Calculate size of convolved/pooled features for convolution_1/max_pooling_1 features
        out_conv_1 = ((self.embedding_size - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
        out_conv_1 = math.floor(out_conv_1)
        out_pool_1 = ((out_conv_1 - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
        out_pool_1 = math.floor(out_pool_1)

        # Calculate size of convolved/pooled features for convolution_2/max_pooling_2 features
        out_conv_2 = ((self.embedding_size - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
        out_conv_2 = math.floor(out_conv_2)
        out_pool_2 = ((out_conv_2 - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
        out_pool_2 = math.floor(out_pool_2)

        # Calculate size of convolved/pooled features for convolution_3/max_pooling_3 features
        out_conv_3 = ((self.embedding_size - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
        out_conv_3 = math.floor(out_conv_3)
        out_pool_3 = ((out_conv_3 - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
        out_pool_3 = math.floor(out_pool_3)

        # Calculate size of convolved/pooled features for convolution_4/max_pooling_4 features
        out_conv_4 = ((self.embedding_size - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
        out_conv_4 = math.floor(out_conv_4)
        out_pool_4 = ((out_conv_4 - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
        out_pool_4 = math.floor(out_pool_4)

        # Returns "flattened" vector (input for fully connected layer)
        return (out_pool_1 + out_pool_2 + out_pool_3 + out_pool_4) * self.out_size

    
    
    def forward(self, x):

        # Sequence of tokes is filterd through an embedding layer
        x = self.embedding(x)

        # Convolution layer 1 is applied
        x1 = self.conv_1(x)
        x1 = torch.relu(x1)
        x1 = self.pool_1(x1)

        # Convolution layer 2 is applied
        x2 = self.conv_2(x)
        x2 = torch.relu((x2))
        x2 = self.pool_2(x2)

        # Convolution layer 3 is applied
        x3 = self.conv_3(x)
        x3 = torch.relu(x3)
        x3 = self.pool_3(x3)

        # Convolution layer 4 is applied
        x4 = self.conv_4(x)
        x4 = torch.relu(x4)
        x4 = self.pool_4(x4)

        # The output of each convolutional layer is concatenated into a unique vector
        union = torch.cat((x1, x2, x3, x4), 2)
        union = union.reshape(union.size(0), -1)

        # The "flattened" vector is passed through a fully connected layer
        out = self.fc(union)
        # Dropout is applied
        out = self.dropout(out)
        # Activation function is applied
        out = torch.sigmoid(out)

        return out.squeeze()

Where I call model/train it:

# Initialize dataset maper
train = DatasetMaper(df_train['text_padded'], df_train['Target'])
test = DatasetMaper(df_test['text_padded'], df_test['Target'])
    
# Initialize loaders
loader_train = DataLoader(train, batch_size=Parameters.batch_size)
loader_test = DataLoader(test, batch_size=Parameters.batch_size)
print('DataLoaders initialized.')

# Define optimizer
optimizer = optim.RMSprop(model.parameters(), lr=Parameters.learning_rate)
print('Done. Everything loaded. Preparing to train model.')

# Starts training phase
for epoch in range(Parameters.epochs):
    print('Epoch: ', epoch+1)
    # Set model in training model
    model.train()
    predictions = []
        # Starts batch training
    for x_batch, y_batch in loader_train:
        #print(x_batch)
        x_batch = torch.tensor(x_batch)
        #print(x_batch)
        
        y_batch = y_batch.type(torch.FloatTensor)
        #print(y_batch)
        
        # Feed the model
        y_pred = model(x_batch)
        #print(y_pred.detach().numpy())

        # Loss calculation
        loss = F.binary_cross_entropy(y_pred, y_batch)

        # Clean gradientes
        optimizer.zero_grad()

        # Gradients calculation
        loss.backward()

        # Gradients update
        optimizer.step()
        
        #if epoch % 1000==1:
            #print(y_pred.detach().numpy())
        # Save predictions
        predictions += y_pred.detach().numpy()

    # Evaluation phase
    test_predictions = Run.evaluation(model, loader_test)

    # Metrics calculation
    train_accuary = Run.calculate_accuray(df_train['Target'], predictions)
    test_accuracy = Run.calculate_accuray(df_test['Target'], test_predictions)
    print("Epoch: %d, loss: %.5f, Train accuracy: %.5f, Test accuracy: %.5f" % (epoch+1, loss.item(), train_accuary, test_accuracy))

could you print ```y_pred.shape```and y_batch.shape``` before passing it into the loss function and tell me what you get? — Theodor Peifer, Dec 03 '20 at 09:37

score 0 · Answer 1 · edited Feb 26 '21 at 01:43

0

I had a similar problem using the same code as yours while trying to run my database into it. I managed to make it work by changing the batch_size in the Parameters dataclass. For some reason, my data works only with an odd number for batch_size.

edited Feb 26 '21 at 01:43

Allen M

1,423
9
15

answered Feb 26 '21 at 01:08

vr4u

31
5

How do I fix the UserWarning: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([])) is deprecated

1 Answers1