1

making a sign language interpreter's GUI with a mediapipe model with a ASL dataset from Kaggle. This is my first time working in Tkinter and I'm trying to print the result from the print_result and print it into the result_label label in my tkinter window. I created a result_string var and a get_result function to update that var and then update the button's text but I'm getting an error

WARNING: Logging before InitGoogleLogging() is written to STDERR
AttributeError: type object 'GestureRecognizerResult' has no attribute 'gestures'

I'm using a .task file from my mediapipe model imported in my workspace

UPDATED START FUNCTION:

''' def start(self): # Get a frame from the video source #self.Signing = True while True: ret, frame = self.vid.get_frame()

        if ret:
            #cv2.imwrite("frame-" + time.strftime("%d-%m-%Y-%H-%M-%S") + ".jpg", cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
            SIZE = 64   #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
            image = frame
            img = cv2.resize(image, (SIZE, SIZE))
            flip = cv2.flip(img, 1)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
            result_string = " "

            images = []
            results = []
            BaseOptions = mp.tasks.BaseOptions
            GestureRecognizer = mp.tasks.vision.GestureRecognizer
            GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
            GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
            VisionRunningMode = mp.tasks.vision.RunningMode

            def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
                for gesture_category in result.gestures[0]:
                    category_name = gesture_category.category_name
                    return_string = category_name
                return result_string


        
            # Create a gesture recognizer instance with the live stream mode:
            options = GestureRecognizerOptions(
                base_options=BaseOptions(model_asset_path='C:\\Users\\HP\\Desktop\\asl_gui\\gesture_recognizer.task'),
                running_mode=VisionRunningMode.LIVE_STREAM,
                result_callback=print_result)
            with GestureRecognizer.create_from_options(options) as recognizer:
                # STEP 3: Recognize gestures in the input image.
                recognition_result = recognizer.recognize_async(mp_image, 100000)


                print(format(result_string)) #just to check

            if result_string:
                self.result_label.config(text=result_string)
            else:
                self.result_label.config(text="No gesture detected")

        time.sleep(2)

'''

COMPLETE CODE WITH OLD START FUNCTION:


import tkinter
import cv2
import PIL.Image, PIL.ImageTk
import time
import matplotlib as plt
import cv2
import mediapipe as mp
from mediapipe.tasks.python import vision
from mediapipe.tasks.python import text 
from mediapipe.tasks import python
from mediapipe.framework.formats import landmark_pb2

class App:
     def __init__(self, window, window_title, video_source=0):
        self.window = window
        self.window.title(window_title)
        frame = tkinter.Frame(master=window,bg="skyblue",padx=10)
        frame.pack()
        self.video_source = video_source
        #self.Signing = False
 
         # open video source (by default this will try to open the computer webcam)
        self.vid = MyVideoCapture(self.video_source)
 
         # Create a canvas that can fit the above video source size
        self.canvas = tkinter.Canvas(window, width = self.vid.width, height = self.vid.height)
        self.canvas.pack()
 
         # Button that lets the user take a snapshot
        self.btn_start=tkinter.Button(window, text="Start Signing", width=50, command=self.start,state=tkinter.NORMAL)
        
        self.btn_stop=tkinter.Button(window,text="End Message",width=50,command=self.stop)
        

        self.result_label = tkinter.Label(window,text="Result: ",font=('Calibri 15 bold'))
      
        self.result_label.pack(anchor=tkinter.CENTER,expand=True)
        self.btn_start.pack(anchor=tkinter.CENTER, expand=True)
        self.btn_stop.pack(anchor=tkinter.CENTER, expand=True)

         # After it is called once, the update method will be automatically called every delay milliseconds
        self.delay = 60
        self.update()
        self.window.mainloop()
 
     def start(self):
        # Get a frame from the video source
        #self.Signing = True
        while True:
            ret, frame = self.vid.get_frame()

            if ret:
                #cv2.imwrite("frame-" + time.strftime("%d-%m-%Y-%H-%M-%S") + ".jpg", cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
                SIZE = 64   #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
                image = frame
                img = cv2.resize(image, (SIZE, SIZE))
                flip = cv2.flip(img, 1)
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
                result_string = " "

                images = []
                results = []
                BaseOptions = mp.tasks.BaseOptions
                GestureRecognizer = mp.tasks.vision.GestureRecognizer
                GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
                GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
                VisionRunningMode = mp.tasks.vision.RunningMode

                def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
                    for gesture_category in result.gestures[0]:
                        category_name = gesture_category.category_name
                        return_string = category_name
                        print(format(category_name))

                def get_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
                    for gesture_category in result.gestures[0]:
                        category_name = gesture_category.category_name
                        result_string = category_name
            

                # Create a gesture recognizer instance with the live stream mode:
                options = GestureRecognizerOptions(
                    base_options=BaseOptions(model_asset_path='C:\\Users\\HP\\Desktop\\asl_gui\\gesture_recognizer.task'),
                    running_mode=VisionRunningMode.LIVE_STREAM,result_callback=print_result)
                with GestureRecognizer.create_from_options(options) as recognizer:
                    # STEP 3: Recognize gestures in the input image.
                    recognition_result = recognizer.recognize_async(mp_image, 100000)

                    get_result(GestureRecognizerResult,mp_image,100000)

                    if result_string:
                        category_name = result_string[0].category_name
                        self.result_label.config(text=category_name)
                    else:
                        self.result_label.config(text="No gesture detected")

            time.sleep(2)

        

     def stop(self):
            self.btn_start['state'] = tkinter.DISABLED

            
     def update(self):
         # Get a frame from the video source
        ret, frame = self.vid.get_frame()
 
        if ret:
            self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame))
            self.canvas.create_image(0, 0, image = self.photo, anchor = tkinter.NW)
 
        self.window.after(self.delay, self.update)
    
       
              
 
class MyVideoCapture:
    def __init__(self, video_source=0):
        # Open the video source
        self.vid = cv2.VideoCapture(video_source)
        if not self.vid.isOpened():
            raise ValueError("Unable to open video source", video_source)
 
         # Get video source width and height
        self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
 
    def get_frame(self):
        if self.vid.isOpened():
            ret, frame = self.vid.read()
            if ret:
                # Return a boolean success flag and the current frame converted to BGR
                return (ret, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            else:
                 return (ret, None)
        else:
            return (ret, None)
 
     # Release the video source when the object is destroyed
    def __del__(self):
        if self.vid.isOpened():
            self.vid.release()

    
 #Create a window and pass it to the Application object
App(tkinter.Tk(), "ASL Interpreter")
Mallick
  • 23
  • 4
  • Why do you have `import cv2` duplicated? – toyota Supra May 23 '23 at 01:55
  • I think `get_result(GestureRecognizerResult,mp_image,100000)` should be `get_result(recognition_result,mp_image,100000)` instead. – acw1668 May 23 '23 at 03:22
  • @acw1668 it doesn't work because it says recognition_result is a NoneType object with no attribute gestures. Also GestureRecognizerResult is this: statement GestureRecognizerResult(gestures: List[List[category_module.Category]], handedness: List[List[category_module.Category]], hand_landmarks: List[List[landmark_module.NormalizedLandmark]], hand_world_landmarks: List[List[landmark_module.Landmark]]) – Mallick May 23 '23 at 10:15
  • 1
    @toyotaSupra my bad, fixed it. It didnt change anything – Mallick May 23 '23 at 10:16
  • Then the function `recognizer.recognize_async(...)` return `None`. Note that `GestureRecognizerResult` is the class name, you cannot use it to access instance variables. – acw1668 May 23 '23 at 10:19
  • @acw1668 I removed the get_result function and changed the print_result to change the value of the global result_string variable and it's not printing anything now. Just a blank at every interval the photo is taking (in the terminal not the GUI window – Mallick May 23 '23 at 11:01
  • `print_result()` does not update `result_string`. – acw1668 May 23 '23 at 11:07
  • @acw1668 Yes I just come to that conclusion. How would you recommend I update result_string? – Mallick May 23 '23 at 11:10

1 Answers1

1

Try out this modified script, make sure to change path to gesture_recognizer.task:

import tkinter
import cv2
import PIL.Image, PIL.ImageTk
import cv2
import mediapipe as mp


class App:
    def __init__(self, window, window_title, video_source=0):
        self.window = window
        self.window.title(window_title)
        frame = tkinter.Frame(master=window,bg="skyblue",padx=10)
        frame.pack()
        self.video_source = video_source
        self.is_recognition_enabled = False
        self.recognized_gesture = None
        # open video source (by default this will try to open the computer webcam)
        self.vid = MyVideoCapture(self.video_source)
 
        # Create a canvas that can fit the above video source size
        self.canvas = tkinter.Canvas(window, width = self.vid.width, height = self.vid.height)
        self.canvas.pack()
 
        # Button that lets the user take a snapshot
        self.btn_start=tkinter.Button(window, text="Start Signing", width=50, command=self.start,state=tkinter.NORMAL)
        
        self.btn_stop=tkinter.Button(window,text="End Message",width=50,command=self.stop)
        self.btn_stop["state"] = tkinter.DISABLED

        self.result_label = tkinter.Label(window,text="Result: ",font=('Calibri 15 bold'))
        self.result_label.pack(anchor=tkinter.CENTER,expand=True)

        self.btn_start.pack(anchor=tkinter.CENTER, expand=True)
        self.btn_stop.pack(anchor=tkinter.CENTER, expand=True)

        self.timestamp = 0
        self.__create_recognizer()

        # After it is called once, the update method will be automatically called every delay milliseconds
        self.delay = 60
        self.update()
        self.window.mainloop()
 
    def start(self):
        self.btn_start["state"] = tkinter.DISABLED
        self.btn_stop["state"] = tkinter.NORMAL
        self.is_recognition_enabled = True

    def result_callback(self, result, output_image, timestamp_ms):
        first_gesture = "No gestures"
        if len(result.gestures) > 0:
            first_gesture = "Category: " + result.gestures[0][0].category_name
            print(f"First recognized gesture: {first_gesture}")
        self.recognized_gesture = first_gesture

    def __create_recognizer(self): # recognizer should be created only once
        BaseOptions = mp.tasks.BaseOptions
        GestureRecognizer = mp.tasks.vision.GestureRecognizer
        GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
        VisionRunningMode = mp.tasks.vision.RunningMode
        # Create a gesture recognizer instance with the live stream mode:
        options = GestureRecognizerOptions(
                base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
                running_mode=VisionRunningMode.LIVE_STREAM,
                result_callback=self.result_callback)
        self.recognizer = GestureRecognizer.create_from_options(options)

    def recognize(self, img):
        SIZE = 64   #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
        img = cv2.resize(img, (SIZE, SIZE))
        flip = cv2.flip(img, 1)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
        # STEP 3: Recognize gestures in the input image.
        self.recognizer.recognize_async(mp_image, self.timestamp) # returns None
        # timestamp must be *always* monotonically increasing (otherwise exception)
        # this is required due to LIVE_STREAM mode
        self.timestamp = self.timestamp + 1 

    def stop(self):
        self.is_recognition_enabled = False
        self.btn_stop["state"] = tkinter.DISABLED
        self.btn_start["state"] = tkinter.NORMAL
        self.result_label.config(text="Stopped")
            
    def update(self):
        # Get a frame from the video source
        ret, frame = self.vid.get_frame()
        if ret:
            self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame))
            self.canvas.create_image(0, 0, image = self.photo, anchor = tkinter.NW)
 
            if self.is_recognition_enabled:
                self.recognize(frame)   
                self.result_label.config(text=self.recognized_gesture)

        self.window.after(self.delay, self.update)
    

class MyVideoCapture:
    def __init__(self, video_source=0):
        # Open the video source
        self.vid = cv2.VideoCapture(video_source)
        if not self.vid.isOpened():
            raise ValueError("Unable to open video source", video_source)
 
        # Get video source width and height
        self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
 
    def get_frame(self):
        if self.vid.isOpened():
            ret, frame = self.vid.read()
            if ret:
                # Return a boolean success flag and the current frame converted to BGR
                return (ret, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            else:
                return (ret, None)
        else:
            return (ret, None)
 
    # Release the video source when the object is destroyed
    def __del__(self):
        if self.vid.isOpened():
            self.vid.release()

    
#Create a window and pass it to the Application object
App(tkinter.Tk(), "ASL Interpreter")

Please note:

  • GestureRecognizer should not be recreated at each frame, otherwise, it completely erases the benefit of the LIVE_STREAM mode.
  • Instead of running the recognition in the btn_start handler, it should use the update().
  • It is not possible to update the Label directly from the result_callback (probably some tkinter-related threading specifics), that is why I use self.recognized_gesture.

I additionally took care of button's (de/)activation, just to make it little bit more user-friendly.

You can see a short demo here:

demo

The output is not very stable, however it is issue of gesture_recognizer, not the script. Also, take a look at another answer where I also handle potential race conditions using Lock.

Stefan
  • 355
  • 2
  • 6
  • @Mallick If the `gesture_recognizer` does not work stable enough, you can try to use the following pretrained TensorFlow network from [Kazuhito00's repo](https://github.com/Kazuhito00/hand-gesture-recognition-using-mediapipe), which provides stable results. You can also find an example of using this network in combination with MediaPipe [another repo](https://github.com/anion0278/dms_perception/blob/bbdb9176b5009cfe6dfb6f3e7696b0a8004444cf/jetson_camera_node/src/mediapipe_node/recognizer.py#LL19C17-L19C17). – Stefan May 25 '23 at 13:13