Could not find out why the mat1 from the convolutional network is 128x4 and not 4x128. The following is the convolutional network used:
model = torch.nn.Sequential(
torch.nn.Conv2d(2,32,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2),
torch.nn.Conv2d(32,64,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2),
torch.nn.Conv2d(64,128,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2,padding=1),
torch.nn.Flatten(),
torch.nn.Linear(128, 64),
torch.nn.ReLU(),
torch.nn.Linear(64,4)
)
The model training code is as follows:
epochs = 1000
losses = [] #A
for i in range(epochs): #B
game = Gridworld(size=size, mode='static') #C
# state_ = game.board.render_np().reshape(1,l1) + np.random.rand(1,l1)/10.0 #D
state_ = game.board.render_np() + np.random.rand(size,size)/10.0 #D
state1 = torch.from_numpy(state_).float() #E
print(state1.shape)
status = 1 #F
while(status == 1): #G
qval = model(state1) #H
qval_ = qval.data.numpy()
if (random.random() < epsilon): #I
action_ = np.random.randint(0,4)
else:
action_ = np.argmax(qval_)
action = action_set[action_] #J
game.makeMove(action) #K
state2_ = game.board.render_np().reshape(1,l1) + np.random.rand(1,l1)/10.0
state2 = torch.from_numpy(state2_).float() #L
reward = game.reward()
with torch.no_grad():
newQ = model(state2.reshape(1,l1))
maxQ = torch.max(newQ) #M
if reward == -1: #N
Y = reward + (gamma * maxQ)
else:
Y = reward
Y = torch.Tensor([Y]).detach()
X = qval.squeeze()[action_] #O
loss = loss_fn(X, Y) #P
print(i, loss.item())
clear_output(wait=True)
optimizer.zero_grad()
loss.backward()
losses.append(loss.item())
optimizer.step()
state1 = state2
if reward != -1: #Q
status = 0
if epsilon > 0.1: #R
epsilon -= (1/epochs)
The error log shown is:
torch.Size([2, 12, 12])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-22-d2f43f09fd01> in <module>()
74 status = 1 #F
75 while(status == 1): #G
---> 76 qval = model(state1) #H
77 qval_ = qval.data.numpy()
78 if (random.random() < epsilon): #I
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
101
102 def forward(self, input: Tensor) -> Tensor:
--> 103 return F.linear(input, self.weight, self.bias)
104
105 def extra_repr(self) -> str:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x4 and 128x64)
mat1 should be the output of the convolutional network after it is flattened, and mat2 is the linear network following it. Appreciate any help. Thanks!