1

I wrote a code for pose estimation using OpenCV and mediapipe library. The program was working well and I was getting around 30-35 fps. When I tried to convert the same program to a module so that I can use it easily in future for different projects, the fps of the new code(module) reduced drastically to 3-4 fps. My original Program:

import cv2
import mediapipe as mp
import time

cap = cv2.VideoCapture(1)
pTime = 0
cTime = 0

mpDraw = mp.solutions.drawing_utils
mpPose = mp.solutions.pose
pose = mpPose.Pose()  
   
while True:
    success, img1 = cap.read()
    img = cv2.flip(img1, 1)  

    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = pose.process(imgRGB)

    if results.pose_landmarks:
        mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)

        for id, lm in enumerate(results.pose_landmarks.landmark):
            h, w, c = img.shape
            cx, cy = int(lm.x*w), int(lm.y*h)
            cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
        
    cTime = time.time()
    fps = 1/(cTime - pTime)
    pTime = cTime

    cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)

    cv2.imshow("Live Feed", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

My attempt at converting it into a module :

import cv2
import mediapipe as mp
import time

class poseDetector():
    def __init__(self, mode=False, upBody=False, smooth=True, detectionCon = 0.5, trackingCon=0.5):
        self.mode = mode
        self.upBody = upBody
        self.smooth = smooth
        self.detectionCon = detectionCon
        self.trackingCon = trackingCon

        self.mpDraw = mp.solutions.drawing_utils
        self.mpPose = mp.solutions.pose
        self.pose =self.mpPose.Pose(self.mode, self.upBody, self.smooth, self.detectionCon, self.trackingCon)         

    def findPose(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.pose.process(imgRGB)

        if self.results.pose_landmarks:
            if draw:
                 self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
    return img

    def findPosition(self, img, draw=True):
        lmList = []
        if self.results.pose_landmarks:
            for id, lm in enumerate(self.results.pose_landmarks.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x*w), int(lm.y*h)
                lmList.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
        return lmList

def main():
    cap = cv2.VideoCapture(1)
    pTime = 0
    cTime = 0

    while True:
        success, img1 = cap.read()
        img = cv2.flip(img1, 1)

        detector = poseDetector()
        img = detector.findPose(img) 
        lmList = detector.findPosition(img)

        cTime = time.time()
        fps = 1/(cTime - pTime)
        pTime = cTime

        cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)

        cv2.imshow("Live Feed", img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break 

if __name__ == '__main__':
    main()

According to me , both the code should have been working in the same manner, but they are not. Can anyone tell where am I making mistake ?

Falcon
  • 73
  • 6
  • you should work on a "minimal reproducible example" for your post. this question is not generally useful because it amounts to simple programming mistakes that should have been found with debugging or at most profiling. – Christoph Rackwitz Jun 27 '21 at 21:03

1 Answers1

0

You need to place detector = poseDetector() to be before the while True::

detector = poseDetector()

while True:
    success, img1 = cap.read()
    ...

Your "module" implementation creates a new poseDetector object every iteration of the main loop.
Each execution of detector = poseDetector() includes a call to poseDetector.__init__ that calls self.pose =self.mpPose.Pose...
There is a lot of overhead...

while True:
    success, img1 = cap.read()
    img = cv2.flip(img1, 1)

    detector = poseDetector()
    ...

In your original ("non-module") implementation, you are executing pose = mpPose.Pose() only once (before the loop).

pose = mpPose.Pose()  
   
while True:
    success, img1 = cap.read()
    ...

I have tested your code before and after moving detector = poseDetector() outside the loop.
After moving the line above the loop, the frame rate is the same as the "non-module" implementation.

Rotem
  • 30,366
  • 4
  • 32
  • 65