How can I calculate accuracy for keypoints detection CNN model in pytorch?

Question

Can someone help me with this please,

def train_net(n_epochs):
    valid_loss_min = np.Inf    
    history = {'train_loss': [], 'valid_loss': [], 'epoch': []}

    for epoch in range(n_epochs):  
        train_loss = 0.0
        valid_loss = 0.0  
        net.train()
        running_loss = 0.0
        for batch_i, data in enumerate(train_loader):
            images = data['image']
            key_pts = data['keypoints']
            key_pts = key_pts.view(key_pts.size(0), -1)
            key_pts = key_pts.type(torch.FloatTensor).to(device)
            images = images.type(torch.FloatTensor).to(device)
            output_pts = net(images)
            loss = criterion(output_pts, key_pts)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*images.data.size(0)      
        net.eval() 

        with torch.no_grad():
            for batch_i, data in enumerate(test_loader):
                images = data['image']
                key_pts = data['keypoints']
                key_pts = key_pts.view(key_pts.size(0), -1)
                key_pts = key_pts.type(torch.FloatTensor).to(device)
                images = images.type(torch.FloatTensor).to(device)
                output_pts = net(images)
                loss = criterion(output_pts, key_pts)          
                valid_loss += loss.item()*images.data.size(0) 
        train_loss = train_loss/len(train_loader.dataset)
        valid_loss = valid_loss/len(test_loader.dataset) 
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1,train_loss,valid_loss))

        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))    
            torch.save(net,f'X:\\xxxx\\xxx\\xxx\\epoch{epoch + 1}_loss{valid_loss}.pth')
            valid_loss_min = valid_loss
        history['epoch'].append(epoch + 1)
        history['train_loss'].append(train_loss)
        history['valid_loss'].append(valid_loss)
    print('Finished Training')
    return history
'''

Above is the training code for reference!

Quick hint: you should provide more specific information about what your data looks like (especially keypoints which are basically the output of your CNN model). Also, please specify the output of your CNN model. — inverted_index, Apr 27 '20 at 08:43

score 2 · Accepted Answer · answered Apr 27 '20 at 14:59

This is funny, I was just working on this minutes ago myself! As you probably realise, simply calculating the Euclidean distance between 2 sets of keypoints doesn't generalise well to cases where you need to compare across body shapes and sizes. So I would recommend using the Object Keypoint Similarity Score, which measures the body joints distance normalised by the scale of the person. As represented in this blog, OKS is defined as:

Here (line 313 function computeOKS) is Facebook research's implementation:

def computeOks(self, imgId, catId):
    p = self.params
    # dimention here should be Nxm
    gts = self._gts[imgId, catId]
    dts = self._dts[imgId, catId]
    inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
    dts = [dts[i] for i in inds]
    if len(dts) > p.maxDets[-1]:
        dts = dts[0:p.maxDets[-1]]
    # if len(gts) == 0 and len(dts) == 0:
    if len(gts) == 0 or len(dts) == 0:
        return []
    ious = np.zeros((len(dts), len(gts)))
    sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
    vars = (sigmas * 2)**2
    k = len(sigmas)
    # compute oks between each detection and ground truth object
    for j, gt in enumerate(gts):
        # create bounds for ignore regions(double the gt bbox)
        g = np.array(gt['keypoints'])
        xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
        k1 = np.count_nonzero(vg > 0)
        bb = gt['bbox']
        x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
        y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
        for i, dt in enumerate(dts):
            d = np.array(dt['keypoints'])
            xd = d[0::3]; yd = d[1::3]
            if k1>0:
                # measure the per-keypoint distance if keypoints visible
                dx = xd - xg
                dy = yd - yg
            else:
                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                z = np.zeros((k))
                dx = np.max((z, x0-xd), axis=0) + np.max((z, xd-x1), axis=0)
                dy = np.max((z, y0-yd), axis=0) + np.max((z, yd-y1), axis=0)
            e = (dx**2 + dy**2) / vars / (gt['area'] + np.spacing(1)) / 2
            if k1 > 0:
                e=e[vg > 0]
            ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
    return ious

I tried this approach but seemed difficult in actual appliction. could you give a solution based on two set of keypoints directly? — krishna, Jul 05 '21 at 04:17

score 1 · Answer 2 · answered Apr 27 '20 at 09:58

1

Perhaps with the euclidean distance: True keypoint: (x, y) Predicted keypoint: (x_, y_) Distance d: sqrt((x_ - x)^2 + (y_ - y)^2). From that you have to get a percentage. if the d == 0 you have 100% for accuracy for that keypoint. But whats 0%? I would say the distance from the true keypoint to the corner of the image which is the farest away from that keypoint. Lets call that distance R. So your accuracy for your point is d / R. Do that for every keypoint and take the average. I just came up with this so it might have some flaws, but I think you can work with that and check if its the right solution for you.

answered Apr 27 '20 at 09:58

Theodor Peifer

3,097
4
17
30

Do you know how can I calculate KEYPOINT_OKS_SIGMAS in detectron2 and pass it to the training phase? – JammingThebBits Jun 27 '21 at 14:01
1

no sorry, never worked with that @JammingThebBits – Theodor Peifer Jun 27 '21 at 14:31

How can I calculate accuracy for keypoints detection CNN model in pytorch?

2 Answers2

Linked