I am new to the mediapipe library. I was following the code from here. Everything works fine without trying to use the visualize
function; that means that I get the following detection results correctly, with the camera on.
I get this following error when I use the visualize
function:
2023-07-18 15:11:18.865101: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:11:19.003048: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:11:22.230024: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.7890625, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=200, width=323, height=271), categories=[Category(index=None, score=0.36328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=490, origin_y=92, width=146, height=266), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=1, origin_y=59, width=43, height=85), categories=[Category(index=None, score=0.10546875, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=4, origin_y=1, width=232, height=369), categories=[Category(index=None, score=0.08203125, display_name=None, category_name='person')], keypoints=[])])
Traceback (most recent call last):
File "/home/simon/Public/face_detection/dog/main.py", line 76, in <module>
annotated_image = visualize(mp_image, DetectionResult)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/Public/face_detection/dog/main.py", line 23, in visualize
for detection in detection_result.detections:
^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: type object 'DetectionResult' has no attribute 'detections'
Here is my code:
import mediapipe as mp
import cv2
import numpy as np
import time
MARGIN = 10 # pixels
ROW_SIZE = 10 # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0) # red
def visualize(
image,
detection_result
) -> np.ndarray:
"""Draws bounding boxes on the input image and return it.
Args:
image: The input RGB image.
detection_result: The list of all "Detection" entities to be visualize.
Returns:
Image with bounding boxes.
"""
for detection in detection_result.detections:
# Draw bounding_box
bbox = detection.bounding_box
start_point = bbox.origin_x, bbox.origin_y
end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)
# Draw label and score
category = detection.categories[0]
category_name = category.category_name
probability = round(category.score, 2)
result_text = category_name + ' (' + str(probability) + ')'
text_location = (MARGIN + bbox.origin_x,
MARGIN + ROW_SIZE + bbox.origin_y)
cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)
return image
model_path = 'efficientdet_lite0.tflite'
BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode
def print_result(result: DetectionResult, output_image: mp.Image, timestamp_ms: int):
print('detection result: {}'.format(result))
options = ObjectDetectorOptions(
base_options=BaseOptions(model_asset_path=model_path),
running_mode=VisionRunningMode.LIVE_STREAM,
max_results=5,
result_callback= print_result)
with ObjectDetector.create_from_options(options) as detector:
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB,
data=frame)
frame_timestamp_ms = int(time.time() * 1000)
detector.detect_async(mp_image, frame_timestamp_ms)
annotated_image = visualize(mp_image, DetectionResult)
cv2.imshow('Object Detection', annotated_image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
I am also using Python 3.11.3
and opencv-python 4.8.0.74
.
I checked the class DetectionResult
to see if there was an attribute called detections
and there is one. I am unsure why I am getting an error for this.
Here is my code without using the visualize
function, and here is my output in general.
import mediapipe as mp
import cv2
import numpy as np
import time
MARGIN = 10 # pixels
ROW_SIZE = 10 # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0) # red
def visualize(
image,
detection_result
) -> np.ndarray:
"""Draws bounding boxes on the input image and return it.
Args:
image: The input RGB image.
detection_result: The list of all "Detection" entities to be visualize.
Returns:
Image with bounding boxes.
"""
for detection in detection_result.detections:
# Draw bounding_box
bbox = detection.bounding_box
start_point = bbox.origin_x, bbox.origin_y
end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)
# Draw label and score
category = detection.categories[0]
category_name = category.category_name
probability = round(category.score, 2)
result_text = category_name + ' (' + str(probability) + ')'
text_location = (MARGIN + bbox.origin_x,
MARGIN + ROW_SIZE + bbox.origin_y)
cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)
return image
model_path = 'efficientdet_lite0.tflite'
BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode
def print_result(result: DetectionResult, output_image: mp.Image, timestamp_ms: int):
print('detection result: {}'.format(result))
options = ObjectDetectorOptions(
base_options=BaseOptions(model_asset_path=model_path),
running_mode=VisionRunningMode.LIVE_STREAM,
max_results=5,
result_callback= print_result)
with ObjectDetector.create_from_options(options) as detector:
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB,
data=frame)
frame_timestamp_ms = int(time.time() * 1000)
detector.detect_async(mp_image, frame_timestamp_ms)
# Removed
# annotated_image = visualize(mp_image, DetectionResult)
# cv2.imshow('Object Detection', annotated_image)
# Added
cv2.imshow('Object Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Output:
(face_detection) [simon@simon dog]$ python main.py
2023-07-18 15:19:26.338527: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:19:26.474704: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-18 15:19:29.716547: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=9, origin_y=2, width=621, height=474), categories=[Category(index=None, score=0.8671875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=225, width=251, height=247), categories=[Category(index=None, score=0.26171875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=485, origin_y=135, width=149, height=256), categories=[Category(index=None, score=0.23046875, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=72, width=41, height=76), categories=[Category(index=None, score=0.14453125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=423, origin_y=287, width=35, height=51), categories=[Category(index=None, score=0.09765625, display_name=None, category_name='remote')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=9, origin_y=2, width=621, height=474), categories=[Category(index=None, score=0.81640625, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=491, origin_y=140, width=144, height=256), categories=[Category(index=None, score=0.3203125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=183, width=251, height=293), categories=[Category(index=None, score=0.24609375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=1, origin_y=284, width=92, height=191), categories=[Category(index=None, score=0.12109375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=74, width=41, height=74), categories=[Category(index=None, score=0.11328125, display_name=None, category_name='book')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=0, width=633, height=480), categories=[Category(index=None, score=0.85546875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=21, origin_y=218, width=355, height=256), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=512, origin_y=139, width=125, height=199), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=2, width=219, height=430), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=423, origin_y=289, width=37, height=51), categories=[Category(index=None, score=0.12109375, display_name=None, category_name='cell phone')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=0, width=633, height=480), categories=[Category(index=None, score=0.80078125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=490, origin_y=138, width=146, height=251), categories=[Category(index=None, score=0.24609375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=203, width=261, height=261), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='dog')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=72, width=41, height=76), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=423, origin_y=286, width=37, height=52), categories=[Category(index=None, score=0.07421875, display_name=None, category_name='remote')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.84375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=198, width=251, height=252), categories=[Category(index=None, score=0.2109375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=484, origin_y=130, width=152, height=246), categories=[Category(index=None, score=0.16796875, display_name=None, category_name='chair')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=71, width=42, height=77), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=58, origin_y=311, width=564, height=155), categories=[Category(index=None, score=0.10546875, display_name=None, category_name='person')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.76953125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=481, origin_y=138, width=158, height=299), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=14, origin_y=225, width=256, height=242), categories=[Category(index=None, score=0.18359375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=73, width=41, height=74), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=2, origin_y=284, width=94, height=191), categories=[Category(index=None, score=0.08203125, display_name=None, category_name='person')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.81640625, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=225, width=251, height=242), categories=[Category(index=None, score=0.26171875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=69, width=42, height=82), categories=[Category(index=None, score=0.14453125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=477, origin_y=108, width=158, height=335), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=26, origin_y=257, width=223, height=72), categories=[Category(index=None, score=0.09765625, display_name=None, category_name='person')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.84375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=14, origin_y=223, width=355, height=247), categories=[Category(index=None, score=0.19921875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=472, origin_y=120, width=168, height=323), categories=[Category(index=None, score=0.18359375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=70, width=42, height=77), categories=[Category(index=None, score=0.15625, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=2, origin_y=310, width=96, height=164), categories=[Category(index=None, score=0.08203125, display_name=None, category_name='couch')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=0, width=633, height=480), categories=[Category(index=None, score=0.8671875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=177, width=251, height=293), categories=[Category(index=None, score=0.2109375, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=69, width=42, height=85), categories=[Category(index=None, score=0.14453125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=523, origin_y=136, width=116, height=207), categories=[Category(index=None, score=0.11328125, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=30, origin_y=313, width=552, height=164), categories=[Category(index=None, score=0.08984375, display_name=None, category_name='couch')], keypoints=[])])
detection result: DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=3, origin_y=2, width=633, height=474), categories=[Category(index=None, score=0.76953125, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=473, origin_y=136, width=164, height=305), categories=[Category(index=None, score=0.24609375, display_name=None, category_name='couch')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=208, width=260, height=256), categories=[Category(index=None, score=0.16796875, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=68, width=42, height=80), categories=[Category(index=None, score=0.1328125, display_name=None, category_name='book')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=0, origin_y=387, width=77, height=92), categories=[Category(index=None, score=0.06640625, display_name=None, category_name='bed')], keypoints=[])])
(face_detection) [simon@simon dog]$