Im building CNN based on this data:https://en.wikipedia.org/wiki/CIFAR-10 , which consists of 10 object classes with 60000 32-by-32 RGB images in total (50000 training, 10000 test).
what i want to do after building and training a CNN is to change the total number of filters (neurons) in the network to the extent of overfitting and underfitting. And to find the configuration resulting with the lowest test error and use it for the following sections.
here is my code :
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim`
import torch
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)`
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=0.5)
x = F.relu(self.fc2(x))
x = F.dropout(x, p=0.5)
x = self.fc3(x)
return x
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
import matplotlib.pyplot as plt
def train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs):
train_losses = []
test_losses = []
best_test_loss = float('inf')
best_net = None
for epoch in range(num_epochs):
running_loss = 0.0
net.train()
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
train_loss = running_loss / len(trainloader)
train_losses.append(train_loss)
net.eval()
with torch.no_grad():
running_loss = 0.0
for i, data in enumerate(testloader, 0):
inputs, labels = data
outputs = net(inputs)
loss = criterion(outputs, labels)
running_loss += loss.item()
test_loss = running_loss / len(testloader)
test_losses.append(test_loss)
if test_loss < best_test_loss:
best_test_loss = test_loss
#best_net = copy.deepcopy(net)
return train_losses, test_losses
def plot_losses(train_losses, test_losses):
plt.plot(train_losses, label='Train')
plt.plot(test_losses, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
num_epochs = 10
train_losses, test_losses = train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs)
plot_losses(train_losses, test_losses)
print(f'Total number of learnable parameters: {sum(p.numel() for p in net.parameters() if p.requires_grad)}')
num_filters_list = [8, 16, 32, 64, 128, 256, 512]
best_test_loss = float('inf')
best_num_filters = None
for num_filters in num_filters_list:
net = Net()
net.conv1 = nn.Conv2d(3, num_filters, 5)
net.conv2 = nn.Conv2d(num_filters, num_filters*2, 5)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
num_epochs = 10
train_losses, test_losses = train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs)
plot_losses(train_losses, test_losses)
print(f'Total number of filters: {num_filters}, Total number of learnable parameters: {sum(p.numel() for p in net.parameters() if p.requires_grad)}')
if test_losses[-1] < best_test_loss:
best_test_loss = test_losses[-1]
best_num_filters = num_filters
print("Best total number of filters:")
print(best_num_filters)
print("best test loss:")
print(best_test_loss)
#print(f'Best total number of filters: {best_num_filters}, Test Loss
Unforunattly i get the following error :
ValueError Traceback (most recent call last)
<ipython-input-14-cb9b92f17413> in <module>
72 optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
73 num_epochs = 10
---> 74 train_losses, test_losses = train_and_evaluate(net, trainloader, testloader, optimizer, criterion, num_epochs)
75 plot_losses(train_losses, test_losses)
76 print(f'Total number of filters: {num_filters}, Total number of learnable parameters: {sum(p.numel() for p in net.parameters() if p.requires_grad)}')
3 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3024 if size_average is not None or reduce is not None:
3025 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3026 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3027
3028
ValueError: Expected input batch_size (8) to match target batch_size (4)
Im guessing the problem is with the dimensions but i cant seem to understand where ? and how to fix the problem if it is the dimentions . Thank you