Yolov7 on Jetson Orin - Detection time

Question

I'm using a Jetson Orin developer kit with the following code based on Yolov7.

The detection time reaches up to 1.6 s on a custom dataset, using GPU with CUDA enabled.

The OS is Ubuntu 20.04, the opencv version is 4.7.0, the python version is 3.8, the CUDA version is 11.4 and cuDNN 8.6.

When using a video file the detection time is reduced, but when I'm using a machine vision camera the detection time is significantly increased.

Any suggestions on how to reduce the detection time?

import logging
from datetime import datetime
from pathlib import Path
import random
from typing import List

import cv2
import numpy as np

from deep_sort.deep_sort import Deep
from enums.camera.RunningCameraStatus import RunningCameraStatus
from models.CameraRunningContext import CameraRunningContext
from models.measurements.CameraSingleMeasurementValue import CameraSingleMeasurementValue
from object_detection import ObjectDetection
from services.detectors.CameraDetectorService import CameraDetectorService


class Yolo7CameraDetectorService(CameraDetectorService):

    def __init__(self):
        super().__init__()

        #Define a dictionary that
        # Key : integer
        # Value : timestamp of last updated(exit camera timestamp)
        self.__detected_items = dict()

    """
    Detect items using yolo7

    Store items that remains in the current frame in the detected items dictionary
    Remove all the other items and write them to camera measurements buffer
    """

    def detect(self, camera: CameraRunningContext):

        #Call abstract method
        super().detect(camera)

        # Counting area ROI
        yellow_poly_pts = np.array([[0, 150], [1440, 150], [1440, 850], [0, 850]])

        #Define codec for video
        fourcc = cv2.VideoWriter_fourcc(*'XVID')

        # Load Object Detection

        # ATTENTION
        # Object detection applies toLower() on path
        path_waights = Path(camera.camera.detection_weights_path)

        od = ObjectDetection(path_waights)
        od.load_detection_model(image_size=416, confThreshold=0.5, nmsThreshold=0.4)
        od.load_class_names(camera.camera.detection_classes_names)

        # Load Object Tracking Deep Sort
        deep = Deep(max_distance=0.8,
                    nms_max_overlap=0.3,
                    n_init=3,
                    max_age=5,
                    max_iou_distance=0.7)
        
        tracker = deep.sort_tracker()

        # While running camera status is enabled and streaming and detecting
        while camera.status is RunningCameraStatus.ENABLED_AND_STREAMING_AND_DETECTING :

            now = datetime.utcnow()

            # Get current raw image frame
            image = camera.get_cur_raw_image()

            # Convert to RGB image
            rgb = image.convert("RGB")

            # Convert np array
            array = rgb.get_numpy_array()
            #out = cv2.VideoWriter('/home/christina/Desktop/yv7run/output.avi', fourcc, 20.0,(1080, 1080))

            # opencv loads the format bgr so the frame needs to be converted to rgb to process
            array = cv2.cvtColor(array, cv2.COLOR_BGR2RGB)

            np.save("/home/christina/Desktop/test.np",array)

            logging.info("Get frame and convert to cvtcolor in mseconds=" + str( (datetime.utcnow() - now).total_seconds() * 1000 ))

            """ 1. Object Detection """
            (class_ids, scores, boxes) = od.detect(array)
            logging.info(
                "od detect only in mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))

            """ 2. Object Tracking """
            features = deep.encoder(array, boxes)
            detections = deep.Detection(boxes, scores, class_ids, features)

            logging.info(
                "tracking only in mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))

            # Draw yellow polygon for the ROI
            cv2.polylines(array, [yellow_poly_pts], True, (0, 255, 255), 2)

            tracker.predict()
            (class_ids, object_ids, boxes) = tracker.update(detections)

            logging.info("Object dtection and tracker in mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))

            # Iterate tracker results
            for class_id, object_id, box in zip(class_ids, object_ids, boxes):

                (x, y, x2, y2) = box
                class_name = od.names[class_id]
                color = od.colors[class_id]

                # Center doubleroll detected
                cx = int((x + x2) / 2)
                cy = int((y + y2) / 2)

                cv2.rectangle(array, (x, y), (x2, y2), (0, 0, 255), 2)
                cv2.rectangle(array, (x, y), (x + len(class_name) * 20, y - 30), (0, 0, 255), -1)
                cv2.putText(array, class_name + " " + str(object_id), (x, y - 10), 0, 0.75, (255, 255, 255), 2)
                cv2.circle(array, (cx, cy), 4, (0, 0, 255), -1)

                """3. Object Counting"""

                # Check if object is in the ROI
                result = cv2.pointPolygonTest(yellow_poly_pts, (int(cx), int(cy)), False)
                #print(result, object_id, datetime.utcnow())



                if result > 0:
                    # Store item to dictionary as camera single measurement value
                    self.__detected_items[object_id] = CameraSingleMeasurementValue(value=1, timestamp= datetime.now())
                    print("FOUND ONE!!!", result)
                    #cv2.imwrite("/home/christina/Desktop/yv7run/Img005.png", array)
                    print("Done with image")

                #out.write(array)

                # Show text with counter
            #od_count = len(obs_counted)
            #cv2.putText(array, "Number of Objects {}".format(od_count), (20, 20), 0, 1, (0, 255, 0), 2)

            #imS = cv2.resize(array, (960, 540))
            #cv2.imshow("Frame", imS)
            #cv2.waitKey(0)
                #out.release()


            #Outside iteration
            #Remove all items that does not exist in the current object ids list
            # The current frame detected items must be added before to the dictionary
            camera_measurement_values = self.__remove_and_get_all_except(object_ids)

            #Add values to buffers
            self.camera_buffered_measurements_service.add_multi(camera.camera.crguid, camera_measurement_values)

            logging.info(
                "Total mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))


    """
    Remove and retutn all items ids from 
    """
    def __remove_and_get_all_except(self, object_ids) -> List[CameraSingleMeasurementValue]:
        
        # Get all dictionary keys that does not exist in the current frame detected objects
        keys = [key for key in self.__detected_items.keys() if key not in object_ids]

        # Get values for the above keys
        values = [self.__detected_items.get(key) for key in keys]

        # Remove above keys form dictionary
        for key in keys: del self.__detected_items[key]

        # Dictionary contains only detected items that are not exited camera and are still trackable
        # Return items that exits camera context
        return values

Nvidia recommend to use on Jetson or DeepStream or directly TensorRT. PyTorch will be slow — Nuzhny, Jun 21 '23 at 11:29

Yolov7 on Jetson Orin - Detection time

0 Answers0