1

I have CNNLstm model as follows.

   class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Sequential(
                nn.Conv2d(
                    in_channels=3,
                    out_channels=16,
                    kernel_size=5,
                    stride=1,
                    padding=2,
                ),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2),
            )        
            self.conv2 = nn.Sequential(
                nn.Conv2d(16, 32, 5, 1, 2),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2),
            )
            #print(num_classes)
            self.out = nn.Linear(32 * 75 * 75, num_classes)#32 * 75 * 75/64 * 37 * 37/128 * 18 * 18

        def forward(self, x):
            x = self.conv1(x)
            x = self.conv2(x)
            x = x.view(x.size(0), -1)
            output = self.out(x)
            return output, x

    import torch
    from torchvision import datasets, transforms
    import torch.nn.functional as f
    from torch_lr_finder import LRFinder
    class CnnLstm(nn.Module):
        def __init__(self):
            super(CnnLstm, self).__init__()
            self.cnn = CNN()
            self.rnn = nn.LSTM(input_size=180000, hidden_size=256, num_layers=3, batch_first=True)#stacked LSTM with 2 layers
            self.linear = nn.Linear(256, num_classes)

        def forward(self, x):
            batch_size, time_steps, channels, height, width = x.size()
            c_in = x.view(batch_size * time_steps, channels, height, width)
            _, c_out = self.cnn(c_in)
            r_in = c_out.view(batch_size, time_steps, -1)
            r_out, (_, _) = self.rnn(r_in)
            r_out2 = self.linear(r_out[:, -1, :])
            return f.log_softmax(r_out2, dim=1)

class TrainCNNLSTM:
    def __init__(self):
        self.seed = 1
        self.batch_size = 8
        self.validate_batch_size = 8
        self.test_batch_size = 1
        self.epoch = 50
        self.learning_rate = 0.005
        self.step = 100
        self.train_loader = None
        self.validate_loader = None
        self.test_loader = None
        self.modelloaded = False
        self.model = CnnLstm().to(device)
        self.criterion = nn.CrossEntropyLoss()
        #self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)#self.learning_rate = 0.001
        self.optimizer = torch.optim.AdamW(self.model.parameters())
        #self.scheduler = optim.lr_scheduler.OneCycleLR(self.optimizer, 2e-3, epochs=self.epoch , steps_per_epoch=len(train_loader))

    def load_data(self):
        data_loader = DataLoader()
        self.train_loader = data_loader.get_train_data(self.batch_size)
        self.validate_loader = data_loader.get_validate_data(self.validate_batch_size)
        self.test_loader = data_loader.get_test_data(self.test_batch_size)
        
    def do_lrfinder(self):
        lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device)
        lr_finder.range_test(self.train_loader, end_lr=1, num_iter=1000)
        lr_finder.plot()
        plt.savefig("LRvsLoss.png")
        plt.close()

    def train(self):    
        for epoch in range(0, self.epoch):
            t_losses=[]
            for iteration, (data, target) in enumerate(self.train_loader):
                print(data.shape)
                data = np.expand_dims(data, axis=1)
                print(data.shape)
                data = torch.FloatTensor(data)
                data, target = data.cuda(), target.cuda()
                data, target = Variable(data), Variable(target)
                self.optimizer.zero_grad()
           

Since it is CNNLstm model, the data input shape to the model are batch_size, time_steps, channels, height, width. (8, 1, 3, 300, 300)

To use torch_lr_finder, we need to run the following code.

lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device)
lr_finder.range_test(self.train_loader, end_lr=1, num_iter=1000)

self.train_loader output shape is (8, 3, 300, 300). So during finding learning rate, self.model can't be used.

How can I use torch_lr_finder for such model?

Ivan
  • 34,531
  • 8
  • 55
  • 100
batuman
  • 7,066
  • 26
  • 107
  • 229

1 Answers1

1

One possibility is that instead of expanding the dims in the for loop you could pass the tensor into the forward function of the model and just use .unsqueeze(1) there. Like this

                print(data.shape)
                print(data.shape)
                data = torch.FloatTensor(data)

just omit the expand dims then in your forward function do this

x = x.unsqueeze(1)
Dwight Foster
  • 342
  • 2
  • 10