GluonCV object detector - inference on batches of images

Question

I'm running object detection on a webcam stream or on video files, using Python on Ubuntu18.04. So far, I've been running the inference frame-by-frame, using this code:

def main():

    ctx = mx.gpu(0)

    # Load pretrained model
    net = gcv.model_zoo.get_model('ssd_512_mobilenet1.0_coco', pretrained=True)
    net.hybridize()

    # Load the webcam handler
    cap = cv2.VideoCapture(0)


    count_frame = 0


    while(True):
        print(f"Frame: {count_frame}")

        # Load frame from the camera
        ret, frame = cap.read()
        # print(type(frame))

        if (cv2.waitKey(25) & 0xFF == ord('q')) or (ret == False):
            cv2.destroyAllWindows()
            cap.release()
            print("Done!!!")
            break

        # Image pre-processing
        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
        frame_nd, frame_np = gcv.data.transforms.presets.ssd.transform_test(frame, short=512, max_size=700)

        # Run frame through network
        frame_nd = frame_nd.as_in_context(ctx)
        class_IDs, scores, bounding_boxes = net(frame_nd)


        # Display result with cv
        img = gcv.utils.viz.cv_plot_bbox(frame_np, bounding_boxes[0], scores[0], class_IDs[0], thresh=0.3, class_names=net.classes)
        gcv.utils.viz.cv_plot_image(img)

        count_frame += 1



    cv2.destroyAllWindows()
    cap.release()



if __name__ == "__main__":
    main()

I wanted to try an alternative version, where I don't perform the detection frame-by-frame, but on batches of frames.
I tried this way:

create an empty list at the beginning;
append every frame (after image pre-processing) to the list;
after N frames (say, N=50), convert the list to a mx.nd.array;
feed said array to the model;

so, with this code:

def main():

    ctx = mx.gpu(0)

    # Load a pretrained model
    net = gcv.model_zoo.get_model('ssd_512_mobilenet1.0_coco', pretrained=True)
    net.hybridize()

    # Load the webcam handler
    cap = cv2.VideoCapture(0)

    count_frame = 0

    batch = []

    while(True):
        print(f"Frame: {count_frame}")

        # Load frame from the camera
        ret, frame = cap.read()

        if (cv2.waitKey(25) & 0xFF == ord('q')) or (ret == False):
            cv2.destroyAllWindows()
            cap.release()
            print("Done!!!")
            break

        # Image pre-processing
        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
        frame_nd, frame_np = gcv.data.transforms.presets.ssd.transform_test(frame, short=512, max_size=700)
        batch.append(frame_nd)


        if (count_frame>0) and (count_frame%50 == 0):
            print("\tStarting detection.")
            batch_nd = mx.nd.array(batch)
            batch_nd = batch_nd.as_in_context(ctx)
            class_IDs, scores, bounding_boxes = net(batch)
            print("\tDetection performed.")
        count_frame += 1



    cv2.destroyAllWindows()
    cap.release()




if __name__ == "__main__":
    main()

The problem is that, when I run it, the execution gets completely stuck when reaching the line:

batch_nd = mx.nd.array(batch)

for reference, this is the output:

Frame: 0
Frame: 1
Frame: 2
Frame: 3
Frame: 4
Frame: 5
Frame: 6
Frame: 7
Frame: 8
Frame: 9
Frame: 10
Frame: 11
Frame: 12
Frame: 13
Frame: 14
Frame: 15
Frame: 16
Frame: 17
Frame: 18
Frame: 19
Frame: 20
    Starting detection.

any clue what I'm doing wrong? Is there any better way to send batches of frames to the model?

GluonCV object detector - inference on batches of images

0 Answers0