0

Im building CNN based on this data:https://en.wikipedia.org/wiki/CIFAR-10 , which consists of 10 object classes with 60000 32-by-32 RGB images in total (50000 training, 10000 test).

what i want to do after building and training a CNN is to change the total number of filters (neurons) in the network to the extent of overfitting and underfitting. And to find the configuration resulting with the lowest test error and use it for the following sections.

here is my code :

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim`

   
    import torch
    import torchvision
     import torchvision.transforms as transforms

      transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


      trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
     trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
       testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)`
      import torch.nn as nn
      import torch.nn.functional as F


      class Net(nn.Module):
          def __init__(self):
             super(Net, self).__init__()
             self.conv1 = nn.Conv2d(3, 6, 5)
             self.pool = nn.MaxPool2d(2, 2)
             self.conv2 = nn.Conv2d(6, 16, 5)
             self.fc1 = nn.Linear(16 * 5 * 5, 120)
             self.fc2 = nn.Linear(120, 84)
             self.fc3 = nn.Linear(84, 10)

          def forward(self, x):
             x = self.pool(F.relu(self.conv1(x)))
             x = self.pool(F.relu(self.conv2(x)))
             x = x.view(-1, 16 * 5 * 5)
             x = F.relu(self.fc1(x))
             x = F.dropout(x, p=0.5)
             x = F.relu(self.fc2(x))
             x = F.dropout(x, p=0.5)
             x = self.fc3(x)
             return x
       net = Net()
       

      criterion = nn.CrossEntropyLoss()
      optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)



for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')
import matplotlib.pyplot as plt


def train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs):
    train_losses = []
    test_losses = []
    best_test_loss = float('inf')
    best_net = None
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        net.train()
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        train_loss = running_loss / len(trainloader)
        train_losses.append(train_loss)
        
        net.eval()
        with torch.no_grad():
            running_loss = 0.0
            for i, data in enumerate(testloader, 0):
                inputs, labels = data
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()
        
        test_loss = running_loss / len(testloader)
        test_losses.append(test_loss)
        
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            #best_net = copy.deepcopy(net)
    
    return train_losses, test_losses



   
def plot_losses(train_losses, test_losses):
    plt.plot(train_losses, label='Train')
    plt.plot(test_losses, label='Test')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()


net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
num_epochs = 10
train_losses, test_losses = train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs)
plot_losses(train_losses, test_losses)
print(f'Total number of learnable parameters: {sum(p.numel() for p in net.parameters() if p.requires_grad)}')


num_filters_list = [8, 16, 32, 64, 128, 256, 512]
best_test_loss = float('inf')
best_num_filters = None
for num_filters in num_filters_list:
    net = Net()
    net.conv1 = nn.Conv2d(3, num_filters, 5)
    net.conv2 = nn.Conv2d(num_filters, num_filters*2, 5)
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    num_epochs = 10
    train_losses, test_losses = train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs)
    plot_losses(train_losses, test_losses)
    print(f'Total number of filters: {num_filters}, Total number of learnable parameters: {sum(p.numel() for p in net.parameters() if p.requires_grad)}')
    if test_losses[-1] < best_test_loss:
        best_test_loss = test_losses[-1]
        best_num_filters = num_filters
print("Best total number of filters:")
print(best_num_filters)
print("best test loss:")
print(best_test_loss)
#print(f'Best total number of filters: {best_num_filters}, Test Loss

Unforunattly i get the following error :


ValueError                                Traceback (most recent call last)
<ipython-input-14-cb9b92f17413> in <module>
     72     optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
     73     num_epochs = 10
---> 74     train_losses, test_losses = train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs)
     75     plot_losses(train_losses, test_losses)
     76     print(f'Total number of filters: {num_filters}, Total number of learnable parameters: {sum(p.numel() for p in net.parameters() if p.requires_grad)}')

3 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
   3024     if size_average is not None or reduce is not None:
   3025         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3026     return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
   3027 
   3028 

ValueError: Expected input batch_size (8) to match target batch_size (4)

Im guessing the problem is with the dimensions but i cant seem to understand where ? and how to fix the problem if it is the dimentions . Thank you

martin
  • 862
  • 9
  • 28
liza
  • 1
  • 1

1 Answers1

0

I believe your input shapes are changed in line x = x.view(-1, 16 * 5 * 5) of the forward function of your network when you are testing with different filters. To identify how the batch size is affected (according to the error message), try printing the shape of your output right before and after the view statement.

print(x.size())
x = x.view(-1, 16 * 5 * 5)
print(x.size())

I am certain that is where you will find the batch sizes getting varied. In that case, 16 * 5 * 5 is an incorrect reshaping dimension.