Unable to identify the problem with PyTorch model outputs (images) during training

Question

I am training a fingerprints dataset for minutiae point detection. My inputs are the rescaled (128 by 128) slap fingerprint images and Ground truth is the 12 channel minutiae points. Each channel is representing the bins of minutiae orientation while the dots are marked at x,y locations of the minutiae. Showing the input and the GT channel (3 and 8 ) as examples.

The input images are normalized between -1 and 1 due to tanh model outputs

The problem is that I am receiving intermediate prediction layer outputs as shown below which are incomprehensible.

Here's my simple Autoencoder model:

import torch.nn.functional as F
class AEC(nn.Module):
def __init__(self,in_ch=3,out_ch=12):
    super(AEC,self).__init__()
    print('in_ch=',in_ch)
    print('out_ch=',out_ch)
    self.conv1 = nn.Conv2d(in_ch, 16, 2, stride=2)  
    self.conv2 = nn.Conv2d(16, 32, 2, stride=2)
    self.conv3 = nn.Conv2d(32, 64, 2, stride=2)
    self.conv4 = nn.Conv2d(64, 128, 2, stride=2)
   
    #Decoder
    self.t_conv1 = nn.ConvTranspose2d(128, 64, 2, stride=2)
    self.t_conv2 = nn.ConvTranspose2d(64, 32, 2, stride=2)
    self.t_conv3 = nn.ConvTranspose2d(32, 16, 2, stride=2)
    self.t_conv4 = nn.ConvTranspose2d(16, out_ch, 2, stride=2)
  
def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = F.relu(self.conv3(x))
    x = F.relu(self.conv4(x))
    x = F.relu(self.t_conv1(x))
    x = F.relu(self.t_conv2(x))
    x = F.relu(self.t_conv3(x))
    x = F.tanh(self.t_conv4(x))
          
    return x

Code for training and viewing the intermediate tensorboard outputs:

for i, data in tqdm(enumerate(train_loader), total=len(train_loader)):
    ite_num = ite_num + 1
    ite_num4val = ite_num4val + 1
    inputs, minmap = data['image'], data['minmap']

    inputs = inputs.type(torch.FloatTensor)
    minmap = minmap.type(torch.FloatTensor)

    # wrap them in Variable
    if torch.cuda.is_available():
        inputs_v, minmap = Variable(inputs.cuda(), requires_grad=False), Variable(minmap.cuda(), requires_grad=False)
    else:
        inputs_v, minmap = Variable(inputs, requires_grad=False), Variable(minmap, requires_grad=False)
    optimizer_min.zero_grad()

    d0_min = min_net(inputs_v)
    # calculate loss
    min_each_loss = l2_loss_fusion(d0_min, minmap)
    min_each_loss.backward()
    optimizer_min.step()             

    running_loss_min += min_each_loss.data.item()

    if(ite_num % save_tensorboard==0):
        writer.add_scalars('Loss_Statistics',{'Loss_total_min': min_each_loss.data,},
                               global_step=ite_num)
        
        grid_image = make_grid(inputs_v[0].clone().cpu().data, 1, normalize=True)
        writer.add_image('input', grid_image, ite_num)

        res = d0_min[0].clone()
        res = res.sigmoid().data.cpu().numpy()[3,:,:]
        res = (res - res.min()) / (res.max() - res.min() + 1e-8)
        writer.add_image('Pred_layer_min_channel3', torch.tensor(res), ite_num, dataformats='HW')
        
        res = d0_min[0].clone()
        res = res.sigmoid().data.cpu().numpy()[8,:,:]
        res = (res - res.min()) / (res.max() - res.min() + 1e-8)
        writer.add_image('Pred_layer_min_channel8', torch.tensor(res), ite_num, dataformats='HW')

        res = minmap[0].clone()
        res = res.sigmoid().data.cpu().numpy()[3,:,:]
        res = (res - res.min()) / (res.max() - res.min() + 1e-8)
        writer.add_image('Ori_layer_min_channel3', torch.tensor(res), ite_num, dataformats='HW')
        
        res = minmap[0].clone()
        res = res.sigmoid().data.cpu().numpy()[8,:,:]
        res = (res - res.min()) / (res.max() - res.min() + 1e-8)
        writer.add_image('Ori_layer_min_channel8', torch.tensor(res), ite_num, dataformats='HW')            

    del d0_min, min_each_loss        
    if ite_num % save_frq == 0:
        torch.save(min_net.state_dict(), model_dir + model_name+"_min_bce_itr_%d_train_tar_%3f.pth" % (ite_num, running_loss_min / ite_num4val))
        running_loss_min = 0.0
        min_net.train()  # resume train
        ite_num4val = 0

My model loss curve after 8k iterations on epoch 1

Please help me where I am going wrong. Is there something wrong with how I am visualizing the outputs on tensorboard?

Why do you think this is incorrect? This seems like a somewhat plausible loss curve for a single epoch of training with that structure of model — DerekG, Feb 17 '22 at 16:35
Never seen such outputs before that is why doubtful if the model is learning or not. Just want someone to confirm if I am writing the code correct. — Suvidha, Feb 17 '22 at 16:39

Unable to identify the problem with PyTorch model outputs (images) during training

0 Answers0