0

I am new to the mediapipe library. I was following the code from here. Everything works fine without trying to use the visualize function; that means that I get the following detection results correctly, with the camera on.

I get this following error when I use the visualize function:

2023-07-18 15:11:18.865101: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:11:19.003048: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:11:22.230024: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.7890625, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=200, width=323, height=271), categories=[Category(index=None, score=0.36328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=490, origin_y=92, width=146, height=266), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=1, origin_y=59, width=43, height=85), categories=[Category(index=None, score=0.10546875, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=4, origin_y=1, width=232, height=369), categories=[Category(index=None, score=0.08203125, display_name=None, category_name='person')], keypoints=[])])
Traceback (most recent call last):
  File "/home/simon/Public/face_detection/dog/main.py", line 76, in <module>
    annotated_image = visualize(mp_image, DetectionResult)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/simon/Public/face_detection/dog/main.py", line 23, in visualize
    for detection in detection_result.detections:
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: type object 'DetectionResult' has no attribute 'detections'

Here is my code:

import mediapipe as mp
import cv2
import numpy as np
import time 

MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red

def visualize(
    image,
    detection_result
) -> np.ndarray:
  """Draws bounding boxes on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)

    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x,
                     MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)

  return image


model_path = 'efficientdet_lite0.tflite'

BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

def print_result(result: DetectionResult, output_image: mp.Image, timestamp_ms: int):
     print('detection result: {}'.format(result))

options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    max_results=5,
    result_callback= print_result)

with ObjectDetector.create_from_options(options) as detector:
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()

        if not ret:
            break

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB,
                                                data=frame)

        frame_timestamp_ms = int(time.time() * 1000)

        detector.detect_async(mp_image, frame_timestamp_ms)

        annotated_image = visualize(mp_image, DetectionResult)

        cv2.imshow('Object Detection', annotated_image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

I am also using Python 3.11.3 and opencv-python 4.8.0.74.

I checked the class DetectionResult to see if there was an attribute called detectionsand there is one. I am unsure why I am getting an error for this.

Here is my code without using the visualize function, and here is my output in general.

import mediapipe as mp
import cv2
import numpy as np
import time 

MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red

def visualize(
    image,
    detection_result
) -> np.ndarray:
  """Draws bounding boxes on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)

    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x,
                     MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)

  return image


model_path = 'efficientdet_lite0.tflite'

BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

def print_result(result: DetectionResult, output_image: mp.Image, timestamp_ms: int):
     print('detection result: {}'.format(result))

options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    max_results=5,
    result_callback= print_result)

with ObjectDetector.create_from_options(options) as detector:
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()

        if not ret:
            break

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB,
                                                data=frame)

        frame_timestamp_ms = int(time.time() * 1000)

        detector.detect_async(mp_image, frame_timestamp_ms)

        # Removed
        # annotated_image = visualize(mp_image, DetectionResult) 
        # cv2.imshow('Object Detection', annotated_image)

        # Added
        cv2.imshow('Object Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


Output:

(face_detection) [simon@simon dog]$ python main.py
2023-07-18 15:19:26.338527: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:19:26.474704: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:19:29.716547: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=9, origin_y=2, width=621, height=474), categories=[Category(index=None, score=0.8671875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=225, width=251, height=247), categories=[Category(index=None, score=0.26171875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=485, origin_y=135, width=149, height=256), categories=[Category(index=None, score=0.23046875, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=72, width=41, height=76), categories=[Category(index=None, score=0.14453125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=423, origin_y=287, width=35, height=51), categories=[Category(index=None, score=0.09765625, display_name=None, category_name='remote')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=9, origin_y=2, width=621, height=474), categories=[Category(index=None, score=0.81640625, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=491, origin_y=140, width=144, height=256), categories=[Category(index=None, score=0.3203125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=183, width=251, height=293), categories=[Category(index=None, score=0.24609375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=1, origin_y=284, width=92, height=191), categories=[Category(index=None, score=0.12109375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=74, width=41, height=74), categories=[Category(index=None, score=0.11328125, display_name=None, category_name='book')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=0, width=633, height=480), categories=[Category(index=None, score=0.85546875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=21, origin_y=218, width=355, height=256), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=512, origin_y=139, width=125, height=199), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=2, width=219, height=430), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=423, origin_y=289, width=37, height=51), categories=[Category(index=None, score=0.12109375, display_name=None, category_name='cell phone')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=0, width=633, height=480), categories=[Category(index=None, score=0.80078125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=490, origin_y=138, width=146, height=251), categories=[Category(index=None, score=0.24609375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=203, width=261, height=261), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='dog')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=72, width=41, height=76), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=423, origin_y=286, width=37, height=52), categories=[Category(index=None, score=0.07421875, display_name=None, category_name='remote')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.84375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=198, width=251, height=252), categories=[Category(index=None, score=0.2109375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=484, origin_y=130, width=152, height=246), categories=[Category(index=None, score=0.16796875, display_name=None, category_name='chair')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=71, width=42, height=77), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=58, origin_y=311, width=564, height=155), categories=[Category(index=None, score=0.10546875, display_name=None, category_name='person')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.76953125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=481, origin_y=138, width=158, height=299), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=14, origin_y=225, width=256, height=242), categories=[Category(index=None, score=0.18359375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=73, width=41, height=74), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=2, origin_y=284, width=94, height=191), categories=[Category(index=None, score=0.08203125, display_name=None, category_name='person')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.81640625, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=225, width=251, height=242), categories=[Category(index=None, score=0.26171875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=69, width=42, height=82), categories=[Category(index=None, score=0.14453125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=477, origin_y=108, width=158, height=335), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=26, origin_y=257, width=223, height=72), categories=[Category(index=None, score=0.09765625, display_name=None, category_name='person')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.84375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=14, origin_y=223, width=355, height=247), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=472, origin_y=120, width=168, height=323), categories=[Category(index=None, score=0.18359375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=70, width=42, height=77), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=2, origin_y=310, width=96, height=164), categories=[Category(index=None, score=0.08203125, display_name=None, category_name='couch')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=0, width=633, height=480), categories=[Category(index=None, score=0.8671875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=177, width=251, height=293), categories=[Category(index=None, score=0.2109375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=69, width=42, height=85), categories=[Category(index=None, score=0.14453125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=523, origin_y=136, width=116, height=207), categories=[Category(index=None, score=0.11328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=30, origin_y=313, width=552, height=164), categories=[Category(index=None, score=0.08984375, display_name=None, category_name='couch')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.76953125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=473, origin_y=136, width=164, height=305), categories=[Category(index=None, score=0.24609375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=208, width=260, height=256), categories=[Category(index=None, score=0.16796875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=68, width=42, height=80), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=387, width=77, height=92), categories=[Category(index=None, score=0.06640625, display_name=None, category_name='bed')], keypoints=[])])
(face_detection) [simon@simon dog]$

1 Answers1

0

I checked through mediapipe solutions but I could not find any mp.tasks.components.containers.DetectionResult

Instead, I could only find mp.tasks.components.containers.LandmarksDetectionResult and mp.tasks.components.containers.detection only so are you sure maybe you are not writing correct things? Or can you give the link where you find this mp.tasks.components.containers.DetectionResult

Aniket
  • 1
  • 1
  • Yeah I looked in the [mediapipe solutions docs](https://developers.google.com/mediapipe/api/solutions/python/mp/tasks/components/containers/Detection), and you are correct– it does not mention the `DetectionResult` class. But I found it here, [docs](https://github.com/google/mediapipe/blob/master/mediapipe/tasks/python/components/containers/detections.py#L130-L142) – Simon Bakken-Jantasuk Jul 22 '23 at 10:03