Transfer MLP Classifier

Question

I have this MLP classifier with four hidden layers (f1, fc2, fc3, outputLayer), I want to transfer the model parameters but want to add two new layers starting from fc3.

SO i created TransferMLP class. in here, Just picked the fc3 layer and then added fc4, however I am facing this error: RuntimeError: mat1 and mat2 shapes cannot be multiplied (3015x1 and 30x15)

I suspect the TransferMLP class is picking the outputLayer mat but don't know how to fix this.

Please help me.

Just don't know how to truncate the model to start from fc3.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable


# Original MLP classifier
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, device):
        super(MLP, self).__init__()
        self.device = device
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.relu1 = nn.ReLU()
        self.drop1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.relu2 = nn.ReLU()
        self.drop2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(hidden_dim2, hidden_dim3)
        self.relu3 = nn.ReLU()
        self.drop3 = nn.Dropout(0.5)
        self.outputLayer = nn.Linear(hidden_dim3, output_dim)
        self.out_act = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.drop1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.drop2(out)
        out = self.fc3(out)
        out = self.relu3(out)
        out = self.drop3(out)
        out = self.outputLayer(out)
        out = self.out_act(out)
        return out


# Transfer learning model
class TransferMLP(nn.Module):
    def __init__(self, pretrained_model, hidden_dim3, hidden_dim4, output_dim, device): #hidden_dim1, hidden_dim2, 
        super(TransferMLP, self).__init__()
        self.device = device
        self.pretrained_model = pretrained_model
        self.fc3 = nn.Linear(pretrained_model.fc3.in_features, hidden_dim3)
        self.relu3 = nn.ReLU()
        self.drop3 = nn.Dropout(0.5)
        self.fc4 = nn.Linear(hidden_dim3, hidden_dim4)
        self.relu4 = nn.ReLU()
        self.drop4 = nn.Dropout(0.5)
        self.outputLayer = nn.Linear(hidden_dim4, output_dim)
        self.out_act = nn.Sigmoid()

    def forward(self, x):
        out = self.pretrained_model(x)
        out = self.fc3(out)
        out = self.relu3(out)
        out = self.drop3(out)
        out = self.fc4(out)
        out = self.relu4(out)
        out = self.drop4(out)
        out = self.outputLayer(out)
        out = self.out_act(out)
        return out


def train(input_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, learning_rate, num_epochs, trainData, trainLabel,
          device):
    dnn_model = MLP(input_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, device=device)
    dnn_model.to(device)

    # Loss and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.SGD(dnn_model.parameters(), lr=learning_rate)

    trainX_batch = Variable(trainData.float()).to(device)
    trainY_batch = Variable(trainLabel.float()).to(device)

    # Train the network
    for epoch in range(num_epochs):
        outputs = dnn_model(trainX_batch)
        tot_loss = criterion(outputs, trainY_batch)

        optimizer.zero_grad()
        tot_loss.backward()
        optimizer.step()

    return dnn_model


def transferMLP_train(pretrained_model, hidden_dim3, hidden_dim4, output_dim, learning_rate, num_epochs, trainData,
                   trainLabel, device):
    model = TransferMLP(pretrained_model, hidden_dim3, hidden_dim4, output_dim, device=device)
    model.to(device)

    # Freeze pre-trained model parameters
    for param in model.pretrained_model.parameters():
        param.requires_grad = False

    # Loss and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    trainX_batch = Variable(trainData.float()).to(device)
    trainY_batch = Variable(trainLabel.float()).to(device)

    # Train the network
    for epoch in range(num_epochs):
        outputs = model(trainX_batch)
        tot_loss = criterion(outputs, trainY_batch)

        optimizer.zero_grad()
        tot_loss.backward()
        optimizer.step()

    return model

real_input_dim :  103
real_input_label_dim:  torch.Size([3015, 1])
input_dim:  103
input_label_dim:  torch.Size([59264, 1])
MLP(
  (fc1): Linear(in_features=103, out_features=60, bias=True)
  (relu1): ReLU()
  (drop1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=60, out_features=30, bias=True)
  (relu2): ReLU()
  (drop2): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=30, out_features=15, bias=True)
  (relu3): ReLU()
  (drop3): Dropout(p=0.5, inplace=False)
  (outputLayer): Linear(in_features=15, out_features=1, bias=True)
  (out_act): Sigmoid()
)
TransferMLP(
  (pretrained_model): MLP(
    (fc1): Linear(in_features=103, out_features=60, bias=True)
    (relu1): ReLU()
    (drop1): Dropout(p=0.5, inplace=False)
    (fc2): Linear(in_features=60, out_features=30, bias=True)
    (relu2): ReLU()
    (drop2): Dropout(p=0.5, inplace=False)
    (fc3): Linear(in_features=30, out_features=15, bias=True)
    (relu3): ReLU()
    (drop3): Dropout(p=0.5, inplace=False)
    (outputLayer): Linear(in_features=15, out_features=1, bias=True)
    (out_act): Sigmoid()
  )
  (fc3): Linear(in_features=30, out_features=15, bias=True)
  (relu3): ReLU()
  (drop3): Dropout(p=0.5, inplace=False)
  (fc4): Linear(in_features=15, out_features=5, bias=True)
  (relu4): ReLU()
  (drop4): Dropout(p=0.5, inplace=False)
  (outputLayer): Linear(in_features=5, out_features=1, bias=True)
  (out_act): Sigmoid()
)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[9], line 90
     88 t_mlp = TransferMLP(mlp, hidden_dim3, hidden_dim4, output_dim, device)
     89 print(t_mlp)
---> 90 final_mlp = transferMLP_train(mlp, hidden_dim3, hidden_dim4, output_dim, learning_rate, 
     91                               num_epochs, real_trainData, real_trainLabel, device)
     93 # Test the model
     94 with torch.no_grad():

File /RAID5/DataStorage/davidd/experiments/pb1_july/models/classifer_transfer.py:113, in transferMLP_train(pretrained_model, hidden_dim3, hidden_dim4, output_dim, learning_rate, num_epochs, trainData, trainLabel, device)
    111 # Train the network
    112 for epoch in range(num_epochs):
--> 113     outputs = model(trainX_batch)
    114     tot_loss = criterion(outputs, trainY_batch)
    116     optimizer.zero_grad()

File /RAID5/DataStorage/davidd/apps/anaconda/envs/pt2.0.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

File /RAID5/DataStorage/davidd/experiments/pb1_july/models/classifer_transfer.py:60, in TransferMLP.forward(self, x)
     58 def forward(self, x):
     59     out = self.pretrained_model(x)
---> 60     out = self.fc3(out)
     61     out = self.relu3(out)
     62     out = self.drop3(out)

File /RAID5/DataStorage/davidd/apps/anaconda/envs/pt2.0.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

File /RAID5/DataStorage/davidd/apps/anaconda/envs/pt2.0.1/lib/python3.10/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
    113 def forward(self, input: Tensor) -> Tensor:
--> 114     return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3015x1 and 30x15)

I googled "pytorch get output from intermediate layer" and after some clicking found the relevant ptrblck post: https://discuss.pytorch.org/t/how-can-l-load-my-best-model-as-a-feature-extractor-evaluator/17254/6 — Sandro, Jul 12 '23 at 07:46
@sandro thank you for your response. I looked at it but it doesn't relate to my problem much. I just want to take from fc1....fc3 and then add one more fc4 before my final outputlayer. — ddq, Jul 12 '23 at 08:05
The code you show doesn't work because `self.pretrained_model(x)` returns the output through all layers (including the output layer) of the pretrained model. I think what you want is to get an intermediate layer of your pretrained model. — Sandro, Jul 12 '23 at 08:14
yes you are right. Just need to stop at fc3, add one more layer fc4 and then bring back the output layer. Please how can I modify that part? — ddq, Jul 12 '23 at 08:27

Transfer MLP Classifier

0 Answers0