Detect digit on a live video camera using OpenCV and TensorFlow

Question

I tried the code provided below to detect digit in the video camera and put a contour around it then classify it using the H5 model but it's giving me bad results, just the camera is open and I can see neither detection nor classification. I'm not sure what I need to change or work on.

I use python2.7 OpenCV 4.2.0 and TensorFlow 1.5.0

The code I'm working with:

from statistics import mode
import cv2, time
from keras.models import load_model
from keras.datasets import mnist
import tensorflow as tf
import numpy as np
import vision_definitions
from PIL import Image
import numpy as np
import sys, os

from utils.inference import detect_digits
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.preprocessor import preprocess_input

# parameters for loading data and images
detection_model_path = '../trained_models/detection_models/model.sav'
class_model_path = '../trained_models/class_models/Num.h5'

# hyper-parameters for bounding boxes shape
frame_window = 10
class_offsets = (20, 40)

# loading models
digit_detection = load_detection_model(detection_model_path)
class_classifier = load_model(class_model_path)

# getting input model shapes for inference
class_target_size = class_classifier.input_shape[1:3]

class_window = []
class_window1 = []

# starting video streaming
cameraIndex = 0
resolution = vision_definitions.kVGA
colorSpace = vision_definitions.kRGBColorSpace
resolution = 2
colorSpace = 3
cv2.namedWindow('window_frame')
video_capture = cv2.VideoCapture(0)
if video_capture.isOpened():
 frame = video_capture.read()
else:
 rval = False
while True:
    rval, frame = video_capture.read()
    gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    digits = detect_digits(digit_detection, gray_image)
    frame = cv2.resize(frame, (640, 480))
    key = cv2.waitKey(1)
    b,g,r = cv2.split(frame) # get b,g,r
    rgb_img = cv2.merge([r,g,b]) # switch it to rgb    

    for digit_coordinates in digits:
        x1, x2, y1, y2 = apply_offsets(digit_coordinates, class_offsets)
        gray_digit = gray_image[y1:y2, x1:x2]
        try:
            gray_digit = cv2.resize(gray_digit, (class_target_size))
        except:
            continue


        gray_digit = preprocess_input(gray_digit, True)
        gray_digit = np.expand_dims(gray_digit, 0)
        gray_digit = np.expand_dims(gray_digit, -1)
        class_prediction = class_classifier.predict(gray_digit)
        class_probability = np.max(class_prediction)
        class_label_arg = np.argmax(class_prediction)

       
        color = color.astype(int)
        color = color.tolist()

        draw_bounding_box(digit_coordinates, rgb_image, color)
        draw_text(digit_coordinates, rgb_image, class_mode,
                  color, 0, -45, 1, 1)

    frame = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
    cv2.imshow('window_frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

score 0 · Answer 1 · edited Mar 16 '23 at 20:48

I spend sometimes since there is no CV2.imshow() on Windows except C++ but there it is ...

[ Sample ]:

import cv2
import matplotlib.pyplot as plt
import matplotlib.animation as animation

import numpy as np

import tensorflow as tf

import os
from os.path import exists

import time

def f1( picture ):
    return np.asarray( picture )

fig = plt.figure()
image = plt.imread( "C:\\Users\\Jirayu Kaewprateep\\Pictures\\Cats\\samples\\03.png" )
im = plt.imshow( image )

global video_capture_0
video_capture_0 = cv2.VideoCapture(0)
video_capture_1 = cv2.VideoCapture(1)

def animate(i ):
    ret0, frame0 = video_capture_0.read()
    if (ret0):
        picture = np.concatenate( ( np.reshape(frame0[:,:,2:3], ( 480, 640, 1 )), 
                                    np.reshape(frame0[:,:,1:2], ( 480, 640, 1 )), 
                                    np.reshape(frame0[:,:,0:1], ( 480, 640, 1 ))),
                                    axis=2 )
        
        im.set_array( f1( picture ) )
    return im,
    
while True:
    # Capture frame-by-frame
    ret0, frame0 = video_capture_0.read()
    ani = animation.FuncAnimation(fig, animate, interval=50, blit=True)
    plt.show()

# When everything is done, release the capture
video_capture_0.release()
cv2.destroyAllWindows()

[ Model ]:

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=( 29, 39, 3 )),
    # tf.keras.layers.Reshape(( 29, 39 * 3 )),
    # tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, return_state=False)),
    
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(.5, .2),
    
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Dense(64),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Dense(64),
])
        
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64))
model.add(tf.keras.layers.Dense(2))
model.summary()

[ Output ]:

Sample

Sample 2

The problem is not how to use OpenCV on windows, but how I can make predictions correctly from models and can detect the digits and classify them? — nada nada, Apr 16 '22 at 21:59
that is because the code need camera interfaces implementation and that is first example, do detect digits you can use CV provided algorithms or own model build you can find from example of the website. — Jirayu Kaewprateep, Apr 18 '22 at 02:59
How should I proceed, I have no idea Sir. I thought of modifying the xml file that is responsible for making the detection I replaced it with a model h5 but I get this error File "cnn.py", line 68, in digits = digit_detection.detectMultiScale(gray_image, 1.3, 10) AttributeError: 'Sequential' object has no attribute 'detectMultiScale' — nada nada, Apr 18 '22 at 06:03

Detect digit on a live video camera using OpenCV and TensorFlow

1 Answers1