0

The dataset is CIFAR10. I've created a VGG-like network:

class FirstModel(nn.Module):
    def __init__(self):
        super(FirstModel, self).__init__()
        self.vgg1 = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.2)
        )

        self.vgg2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.2)
        )

        self.vgg3 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.2)
        )

        self.fc1 = nn.Linear(4 * 4 * 64, 4096)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 10)
        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(0.5)


    def forward(self, x):
        x = self.vgg3(self.vgg2(self.vgg1(x)))
        x = nn.Flatten()(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.softmax(self.fc3(x))

        return x

Then I train it and visualize loss and accuracy:

import matplotlib.pyplot as plt
from IPython.display import clear_output

def plot_history(train_history, val_history, title='loss'):
    plt.figure()
    plt.title('{}'.format(title))
    plt.plot(train_history, label='train', zorder=1)
    
    points = np.array(val_history)
    steps = list(range(0, len(train_history) + 1, int(len(train_history) / len(val_history))))[1:]
    
    plt.scatter(steps, val_history, marker='*', s=180, c='red', label='val', zorder=2)
    plt.xlabel('train steps')
    
    plt.legend(loc='best')
    plt.grid()

    plt.show()

def train_model(model, optimizer, train_dataloader, test_dataloader):
      criterion = nn.CrossEntropyLoss() 
      
      train_loss_log = []
      train_acc_log = []
      val_loss_log = []
      val_acc_log = []

      for epoch in range(NUM_EPOCH):
        
        model.train()
        
        train_loss = 0.
        train_size = 0
        train_acc = 0.


        for inputs, labels in train_dataloader:
          
          inputs, labels = inputs.to(device), labels.to(device)

          optimizer.zero_grad()
          y_pred = model(inputs) 

          loss = criterion(y_pred, labels) 
          loss.backward()
          optimizer.step()

          train_loss += loss.item()
          train_size += y_pred.size(0)
          train_loss_log.append(loss.data / y_pred.size(0))

          _, pred_classes = torch.max(y_pred, 1)
          train_acc += (pred_classes == labels).sum().item()
          train_acc_log.append(np.mean((pred_classes == labels).cpu().numpy()))

        # блок validation
        val_loss = 0.
        val_size = 0
        val_acc = 0.
        
        model.eval()
        with torch.no_grad():
            for inputs, labels in test_dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                y_pred = model(inputs)
                loss = criterion(y_pred, labels)
                val_loss += loss.item()
                val_size += y_pred.size(0)

                _, pred_classes = torch.max(y_pred, 1)
                val_acc += (pred_classes == labels).sum().item()
        
        val_loss_log.append(val_loss/val_size)
        val_acc_log.append(val_acc/val_size)

        clear_output()
        plot_history(train_loss_log, val_loss_log, 'loss')
        plot_history(train_acc_log, val_acc_log, 'accuracy')

        print('Train loss:', train_loss / train_size)
        print('Train acc:', train_acc / train_size)
        print('Val loss:', val_loss / val_size)
        print('Val acc:', val_acc / val_size)

Then I train the model:

first_model = FirstModel()
first_model.to(device)

optimizer = optim.RMSprop(first_model.parameters(), lr=0.001, momentum=0.9)

train_model(first_model_rms, optimizer, train_dataloader, test_dataloader)

The loss and accuracy do not change (accuracy at level of 0.1). However, if the optimizer is SGD with momentum everything works fine (loss and accuracy change). I've already tried to change momentum and lr, but it does not help.

What should be fixed? Would be grateful for any possible advice!

Sunny Duckling
  • 317
  • 1
  • 2
  • 13

3 Answers3

1

try to decrease the learning rate more .....if then also there is no affect on the accuracy and loss then change the optimizer to adams or something else and play with different learning rates.

1

So first of all, you don't have to use softmax in the "model" as it is done by the nn.CrossEntropyLoss, and I also think that the RMSprop doesn't work with momentum.

Krueger
  • 1,178
  • 3
  • 11
  • 26
0

In my case, I was facing the same error. On my laptop without GPU the training was fine. When I tried on GPU the model didn’t change the accuracy and loss after the first epochs. I was using nn.CrossEntropyLoss() with Adam. Changing Adam with SGD worked for me.

M.Naveed Riaz
  • 55
  • 1
  • 5