I am trying to implement Object Detection using YOLOV3 AND Pytorch. I am training the model on my custom Dataset, which contains 200 images of one type only and has only one object (which is labelled, for ref. check the image below). I have annotated the Images and saved them into .txt format.
Dataset
--------images img1.jpg img2.jpg
--------labels img1.txt img2.txt
test.txt valid.txt
My model training part is working fine. I save weights, after every epoch(Checkpoints). I run the program(training part) for 20 Epoch values and take the last saved checkpoint weight for the object detection part. There is only 1 class (IRA), indexed (0) My Yolo.cfg file is also correct. [class = 1, filters = 18] (updated in three parts of the file)
But the problem is:-
On running the object detection part of the program, I am getting this error
This problem is specific to this only. I tested on other images dataset, it worked fine but I don't know why it is not giving any label in this case?
Here is my training model part
from __future__ import division
from models import *
from utils import *
from datasets import *
from parse_config import *
import os
import sys
import time
import datetime
import argparse
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim
parser = argparse.ArgumentParser()
parser.add_argument("-f")
parser.add_argument("--epochs", type=int, default=20, help="number of epochs")
parser.add_argument("--image_folder", type=str, default="/content/drive/My Drive/FINAL_PYTORCH_DATA/images", help="path to dataset")
parser.add_argument("--batch_size", type=int, default=16, help="size of each image batch")
parser.add_argument("--model_config_path", type=str, default="/content/drive/My Drive/config/yolov3.cfg", help="path to model config file")
parser.add_argument("--data_config_path", type=str, default="/content/drive/My Drive/config/coco.data", help="path to data config file")
parser.add_argument("--weights_path", type=str, default="/content/drive/My Drive/config/yolov3.weights", help="path to weights file")
parser.add_argument("--class_path", type=str, default="/content/drive/My Drive/config/coco.names", help="path to class label file")
parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
parser.add_argument("--nms_thres", type=float, default=0.4, help="iou thresshold for non-maximum suppression")
parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation")
parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights")
parser.add_argument("--checkpoint_dir", type=str, default="/content/checkpoints", help="directory where model checkpoints are saved")
parser.add_argument("--use_cuda", type=bool, default=True, help="whether to use cuda if available")
opt = parser.parse_args()
print(opt)
cuda = torch.cuda.is_available() and opt.use_cuda
os.makedirs("checkpoints", exist_ok=True)
classes = load_classes(opt.class_path)
# Get data configuration
data_config = parse_data_config(opt.data_config_path)
train_path = data_config["train"]
# Get hyper parameters
hyperparams = parse_model_config(opt.model_config_path)[0]
learning_rate = float(hyperparams["learning_rate"])
momentum = float(hyperparams["momentum"])
decay = float(hyperparams["decay"])
burn_in = int(hyperparams["burn_in"])
# Initiate model
model = Darknet(opt.model_config_path)
model.load_weights(opt.weights_path)
#model.apply(weights_init_normal)
if cuda:
model = model.cuda()
model.train()
# Get dataloader
dataloader = torch.utils.data.DataLoader(
ListDataset(train_path), batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu
)
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
for epoch in range(opt.epochs):
for batch_i, (_, imgs, targets) in enumerate(dataloader):
imgs = Variable(imgs.type(Tensor))
targets = Variable(targets.type(Tensor), requires_grad=False)
optimizer.zero_grad()
loss = model(imgs, targets)
loss.backward()
optimizer.step()
print(
"[Epoch %d/%d, Batch %d/%d] [Losses: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f, recall: %.5f, precision: %.5f]"
% (
epoch,
opt.epochs,
batch_i,
len(dataloader),
model.losses["x"],
model.losses["y"],
model.losses["w"],
model.losses["h"],
model.losses["conf"],
model.losses["cls"],
loss.item(),
model.losses["recall"],
model.losses["precision"],
)
)
model.seen += imgs.size(0)
if epoch % opt.checkpoint_interval == 0:
model.save_weights("%s%d.weights" % (opt.checkpoint_dir, epoch))
Here is my model loading part:-
config_path='/content/drive/My Drive/config/yolov3.cfg'
weights_path='/content/checkpoints19.weights'
class_path='/content/drive/My Drive/config/coco.names'
img_size=416
conf_thres=0.8
nms_thres=0.4
# Load model and weights
model = Darknet(config_path, img_size=img_size)
model.load_weights(weights_path)
model.cuda()
model.eval()
classes = utils.load_classes(class_path)
Tensor = torch.cuda.FloatTensor
Here is my object detection part:-
def detect_image(img):
# scale and pad image
ratio = min(img_size/img.size[0], img_size/img.size[1])
imw = round(img.size[0] * ratio)
imh = round(img.size[1] * ratio)
img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),
transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),
(128,128,128)),
transforms.ToTensor(),
])
# convert image to Tensor
image_tensor = img_transforms(img).float()
image_tensor = image_tensor.unsqueeze_(0)
input_img = Variable(image_tensor.type(Tensor))
# run inference on the model and get detections
with torch.no_grad():
detections = model(input_img)
detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)
return detections[0]
# load image and get detections
img_path = "/content/image1 - Copy (2).jpg"
prev_time = time.time()
img = Image.open(img_path)
detections = detect_image(img)
inference_time = datetime.timedelta(seconds=time.time() - prev_time)
print ('Inference Time: %s' % (inference_time))
# Get bounding-box colors
cmap = plt.get_cmap('tab20b')
colors = [cmap(i) for i in np.linspace(0, 1, 20)]
img = np.array(img)
plt.figure()
fig, ax = plt.subplots(1, figsize=(12,9))
ax.imshow(img)
pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
unpad_h = img_size - pad_y
unpad_w = img_size - pad_x
k=[]
if detections is not None:
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
bbox_colors = random.sample(colors, n_cls_preds)
# browse detections and draw bounding boxes
for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
box_h = ((y2 - y1) / unpad_h) * img.shape[0]
box_w = ((x2 - x1) / unpad_w) * img.shape[1]
y1 = ((y1 - pad_y // 2) / unpad_h) * img.shape[0]
x1 = ((x1 - pad_x // 2) / unpad_w) * img.shape[1]
color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor='none')
ax.add_patch(bbox)
k=classes[int(cls_pred)]
plt.text(x1, y1, s=classes[int(cls_pred)], color='white', verticalalignment='top',
bbox={'color': color, 'pad': 0})
plt.axis('off')
# save image
plt.savefig(img_path.replace(".jpg", "-det.jpg"), bbox_inches='tight', pad_inches=0.0)
plt.show()
print(k)
If anyone can help me understand, what causes this error and how to solve this?