I am very perplexed why I am having a shape error after verifying the output shapes of layers. Can anyone please help identify where I am going wrong?
Based upon the summary of layers I included it seems like the error is occurring between layer 6 and 7. But the output of layer 6 shows the same dimension as the input for layer 7. It should be noted that the error dimension 6272 corresponds with the output of layer 3/4.
I'm receiving this error:
Traceback (most recent call last):
File "C:\Users\logan\Spyder_ProjectCode.py", line 215, in <module>
training_loss[t] = train_loop(trainloader, model, loss_fn, opt)/len(trainloader)
File "C:\Users\logan\Spyder_ProjectCode.py", line 175, in train_loop
pred = model(X)
File "C:\Users\logan\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\logan\anaconda3\lib\site-packages\torch\nn\modules\container.py", line 141, in forward
input = module(input)
File "C:\Users\logan\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\logan\anaconda3\lib\site-packages\torch\nn\modules\linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x6272 and 1152x512)
#Summary of layers
Conv2d-1 [-1, 64, 15, 15] 1,792
ReLU-2 [-1, 64, 15, 15] 0
Conv2d-3 [-1, 128, 7, 7] 73,856
ReLU-4 [-1, 128, 7, 7] 0
MaxPool2d-5 [-1, 128, 3, 3] 0
Flatten-6 [-1, 1152] 0
Linear-7 [-1, 512] 590,336
ReLU-8 [-1, 512] 0
Linear-9 [-1, 340] 174,420
ReLU-10 [-1, 340] 0
Linear-11 [-1, 47] 16,027
================================================================
Here is my code:
model = nn.Sequential(
Conv2d(3, 64, kernel_size=3, stride=2),
ReLU(),
Conv2d(64, 128, kernel_size=3, stride=2),
ReLU(),
MaxPool2d((2,2), stride=(2,2)),
Flatten(),
Linear(3*3*128, 512),
ReLU(),
Linear(512, 340),
ReLU(),
Linear(340, 47)
)
loss_fn = nn.CrossEntropyLoss()
learning_rate = 0.1
epochs = 15
momen = 0.8
model = model.to(device) #choose one or the other
opt = optim.SGD(model.parameters(), lr=learning_rate, momentum=momen)
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
training_loss = 0
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
opt.zero_grad()
loss.backward()
opt.step()
training_loss += loss.item()
return training_loss
training_loss = np.zeros(epochs)
for t in range(epochs):
print(f"Epoch {t+1}\\n-------------------------------")
training_loss\[t\] = train_loop(trainloader, model, loss_fn, opt)/len(trainloader)
print("Done!")