How to use the webcam to capture an image and extract the information on it using python?

Question

I was trying to capture am image on the webcam and extract the text information on it using the language of python.

Here is the code:

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pytesseract

from PIL import Image
from pytesseract import image_to_string

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def main():
    # Use the attached camera to capture images
    # 0 stands for the first one
    cap = cv2.VideoCapture(0)   

    if cap.isOpened():
        ret, frame = cap.read()
        print(ret)
        print(frame)
    else:
        ret = False

    img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # img = Image.open('image.jpg')
    text = pytesseract.image_to_string(img1)
    print(text)

    # plt.imshow(img1)
    # plt.title('Color Image RGB')
    # plt.xticks([])
    # plt.yticks([])
    # plt.show()


    cap.release()

if __name__ == "__main__":
main()

The code didn't work. I watched a couple of videos on Youtube, and I saw that people typically use Image.open("image.jpg") to open an image that is located on the computer. But I need to capture the image from the webcam and extract the information on it. So that method won't work in my situation. Is there a way to combine these two methods? Like capture the image using cv2 and extract the information using pytesseract.image_to_string()?

score 2 · Answer 1 · answered Sep 29 '19 at 15:24

Can you please try by replacing the below code of line,

text = pytesseract.image_to_string(img1)

With the code,

text = pytesseract.image_to_string(Image.fromarray(img1))

Or have a working code snippet here, (Copied your code and updated a little),

def main():
    # Use the attached camera to capture images
    # 0 stands for the first one
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        text = pytesseract.image_to_string(Image.fromarray(img1))
        cv2.imshow('frame', img1)
        if cv2.waitKey(0) & 0xFF == ord('q'):
            return None
        print("Extracted Text: ", text)
    cap.release()

Hope This will help you.

I used while look because with if condtion I did not get result, trying to figure it out.

It is not quite extracting the text, I guess maybe I need to convert the image to black and white. Anyway, I appreciate the solution sir. — Leon S. Kennedy, Sep 29 '19 at 19:39
Welcome! I am also learning the OpenCV. Can you please try cv2.COLOR_BGR2GRAY instead of cv2.COLOR_BGR2GRAY in cv2.cvtColor() — Sachin, Sep 30 '19 at 05:37

score 0 · Answer 2 · edited Oct 05 '21 at 06:10

import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
framewidth = 640
frameheight = 480

cap = cv2.VideoCapture(0)

cap.set(3, framewidth)
cap.set(4, frameheight)

while True:
    success, img = cap.read( )

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # print(pytesseract.image_to_string(img))
    ## detecting characters
    # hImg,wImg,_= img.shape
    # boxes=pytesseract.image_to_boxes(img)
    # for b in boxes.splitlines():
    #     # print(b)
    #     b=b.split(' ')
    #     print(b)
    #     x,y,w,h = int(b[1]),int(b[2]),int(b[3]),int(b[4])
    #     cv2.rectangle(img,(x,hImg-y),(w,hImg-h),(0,0,255),3)
    #     cv2.putText(img,b[0],(x,hImg-y+25),cv2.FONT_HERSHEY_COMPLEX,1,(50,100,255),2)

    # ## detecting words
    hImg, wImg, _ = img.shape
    boxes = pytesseract.image_to_data(img)

    for x, b in enumerate(boxes.splitlines( )):

        if x != 0:
            b = b.split( )
            print(b)
            if len(b)==12:
             x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
             cv2.rectangle(img, (x, y), (w+x, h+y), (0, 0, 255), 3)
             cv2.putText(img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 100, 255), 2)

    ## detecting digits
    # hImg, wImg, _ = img.shape
    # cong= r'--oem 3 --psm 6 outputbase digits'
    # boxes = pytesseract.image_to_data(img,config=cong)
    #
    # for x, b in enumerate(boxes.splitlines( )):
    #
    #     if x != 0:
    #         b = b.split( )
    #         print(b)
    #         if len(b) == 12:
    #             x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
    #             cv2.rectangle(img, (x, y), (w + x, h + y), (0, 0, 255), 3)
    #             cv2.putText(img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 100, 255), 2)

    # reading text don't delete it
    # print(pytesseract.image_to_boxes(img))
    cv2.imshow("video", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
#I don't no how to give answer but i have my code and it's working fine enjoy

Your answer could be improved with additional supporting information. Please [edit] to add further details, such as citations or documentation, so that others can confirm that your answer is correct. You can find more information on how to write good answers [in the help center](/help/how-to-answer). — Community, Oct 05 '21 at 06:10

How to use the webcam to capture an image and extract the information on it using python?

2 Answers2