Accuracy and loss does not change with RMSprop optimizer

Question

The dataset is CIFAR10. I've created a VGG-like network:

class FirstModel(nn.Module):
    def __init__(self):
        super(FirstModel, self).__init__()
        self.vgg1 = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.2)
        )

        self.vgg2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.2)
        )

        self.vgg3 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.2)
        )

        self.fc1 = nn.Linear(4 * 4 * 64, 4096)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 10)
        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(0.5)


    def forward(self, x):
        x = self.vgg3(self.vgg2(self.vgg1(x)))
        x = nn.Flatten()(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.softmax(self.fc3(x))

        return x

Then I train it and visualize loss and accuracy:

import matplotlib.pyplot as plt
from IPython.display import clear_output

def plot_history(train_history, val_history, title='loss'):
    plt.figure()
    plt.title('{}'.format(title))
    plt.plot(train_history, label='train', zorder=1)
    
    points = np.array(val_history)
    steps = list(range(0, len(train_history) + 1, int(len(train_history) / len(val_history))))[1:]
    
    plt.scatter(steps, val_history, marker='*', s=180, c='red', label='val', zorder=2)
    plt.xlabel('train steps')
    
    plt.legend(loc='best')
    plt.grid()

    plt.show()

def train_model(model, optimizer, train_dataloader, test_dataloader):
      criterion = nn.CrossEntropyLoss() 
      
      train_loss_log = []
      train_acc_log = []
      val_loss_log = []
      val_acc_log = []

      for epoch in range(NUM_EPOCH):
        
        model.train()
        
        train_loss = 0.
        train_size = 0
        train_acc = 0.


        for inputs, labels in train_dataloader:
          
          inputs, labels = inputs.to(device), labels.to(device)

          optimizer.zero_grad()
          y_pred = model(inputs) 

          loss = criterion(y_pred, labels) 
          loss.backward()
          optimizer.step()

          train_loss += loss.item()
          train_size += y_pred.size(0)
          train_loss_log.append(loss.data / y_pred.size(0))

          _, pred_classes = torch.max(y_pred, 1)
          train_acc += (pred_classes == labels).sum().item()
          train_acc_log.append(np.mean((pred_classes == labels).cpu().numpy()))

        # блок validation
        val_loss = 0.
        val_size = 0
        val_acc = 0.
        
        model.eval()
        with torch.no_grad():
            for inputs, labels in test_dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                y_pred = model(inputs)
                loss = criterion(y_pred, labels)
                val_loss += loss.item()
                val_size += y_pred.size(0)

                _, pred_classes = torch.max(y_pred, 1)
                val_acc += (pred_classes == labels).sum().item()
        
        val_loss_log.append(val_loss/val_size)
        val_acc_log.append(val_acc/val_size)

        clear_output()
        plot_history(train_loss_log, val_loss_log, 'loss')
        plot_history(train_acc_log, val_acc_log, 'accuracy')

        print('Train loss:', train_loss / train_size)
        print('Train acc:', train_acc / train_size)
        print('Val loss:', val_loss / val_size)
        print('Val acc:', val_acc / val_size)

Then I train the model:

first_model = FirstModel()
first_model.to(device)

optimizer = optim.RMSprop(first_model.parameters(), lr=0.001, momentum=0.9)

train_model(first_model_rms, optimizer, train_dataloader, test_dataloader)

The loss and accuracy do not change (accuracy at level of 0.1). However, if the optimizer is SGD with momentum everything works fine (loss and accuracy change). I've already tried to change momentum and lr, but it does not help.

What should be fixed? Would be grateful for any possible advice!

score 1 · Answer 1 · answered Oct 28 '20 at 13:52

1

try to decrease the learning rate more .....if then also there is no affect on the accuracy and loss then change the optimizer to adams or something else and play with different learning rates.

answered Oct 28 '20 at 13:52

Tunesh Verma

62
4

I've tried to decrease/increase lr and momentum, but nothing changes. Cannot understand why RMSprop won't work. – Sunny Duckling Oct 28 '20 at 14:01
Did you try some other optimizer? – Tunesh Verma Oct 29 '20 at 06:48

score 1 · Accepted Answer · answered Oct 28 '20 at 14:09

1

So first of all, you don't have to use softmax in the "model" as it is done by the nn.CrossEntropyLoss, and I also think that the RMSprop doesn't work with momentum.

answered Oct 28 '20 at 14:09

Krueger

1,178
3
11
26

Yes, the problem was with momentum (should not have added it). Thank you a lot! – Sunny Duckling Oct 28 '20 at 14:15
But you are also using softmax twice. – Krueger Oct 28 '20 at 14:17
So, if I get it right, it's better to get rid of last softmax (after fc3)? – Sunny Duckling Oct 28 '20 at 14:29
yes, because `nn.CrossEntropyLoss` is composition of `nn.LogSoftmax()` and `nn.NLLLoss()`. – Krueger Oct 28 '20 at 14:33

score 0 · Answer 3 · answered Feb 07 '22 at 20:52

In my case, I was facing the same error. On my laptop without GPU the training was fine. When I tried on GPU the model didn’t change the accuracy and loss after the first epochs. I was using nn.CrossEntropyLoss() with Adam. Changing Adam with SGD worked for me.

Accuracy and loss does not change with RMSprop optimizer

3 Answers3