pytesseract detects the wrong integer values

Question

I'm trying to detects the numbers found in my sqares, and I thought I could use the libary pytesseract, but for some reason I read the wrong values.

This is the console output:

And here I have all my pictures (they are seperated, this is just to show them all)

import numpy as np 
import cv2 
import re
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
 

img = cv2.imread('gulRecNum.jpg') 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 

# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# create a binary thresholded image on hue between red and yellow
lower = (0,240,160)
upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)

# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)



# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

result1 = img.copy()
result2 = img.copy()

mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0

for c in contours:
    cv2.drawContours(result1,[c],0,(0,0,0),2)
    # get rotated rectangle from contour
    rot_rect = cv2.minAreaRect(c)
    box = cv2.boxPoints(rot_rect)
    box = np.int0(box)
    # draw rotated rectangle on copy of img
    cv2.drawContours(result2,[box],0,(0,0,0),2)
    # Gør noget hvis arealet er større end 1.
    # Whats the area of the component?
    areal = cv2.contourArea(c)
    if(areal > 1):
        # get the center of mass
        M = cv2.moments(c)
        cx = int(M['m10']/M['m00'])
        cy = int(M['m01']/M['m00'])
        center = (cx, cy)
        print("\nx: ",cx,"\ny: ",cy)
        color = (0, 0, 255)

        cv2.circle(result2, center, 3, color, -1)
        cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
        cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)

        # LOOK AT THIS PART
        x,y,w,h = cv2.boundingRect(c)
        ROI = 255 - thresh[y:y+h, x:x+w]
        cv2.drawContours(mask, [c], -1, (255,255,255), -1)
        cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)

        Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
        print("Number ", Number)

        ROI_number += 1

# save result
cv2.imwrite("4cubes_result2.png",result2)

# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)

cv2.waitKey(0)
cv2.destroyAllWindows()

Thought I could write Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789') print(Number) and then get the number from the image, but I don't, how can that be?

EDIT NEW ERROR

how do i solve it with this picture?

from PIL import Image
from operator import itemgetter
import numpy as np 
import easyocr
import cv2 
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory

#Define empty array
Cubes = []

def getNumber(ROI):
    img = cv2.imread(ROI)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(gray,127,255,0)

    #cv2.imshow(thresh)
    #cv2.imshow('Thresholded original',thresh)
    #cv2.waitKey(0)

    ## Get contours
    contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)


    ## only draw contour that have big areas
    imx = img.shape[0]
    imy = img.shape[1]
    lp_area = (imx * imy) / 10

    tmp_img = img.copy()

    for cnt in contours:
        approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
        
        if cv2.contourArea(cnt) > lp_area:

            # Draw box corners and minimum area rectangle
            rect = cv2.minAreaRect(cnt)
            box = cv2.boxPoints(rect)
            box = np.int0(box)
            #cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
            #cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
            #cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
            #cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
            #cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
            #cv2.imshow(tmp_img)
            #cv2.imshow('Minimum Area Rectangle', tmp_img)
            #cv2.waitKey(0)

            ## Correct orientation and crop
            # Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
            width = int(rect[1][0])
            height = int(rect[1][1])
            src_pts = box.astype("float32")
            dst_pts = np.array([[0, height-1],
                                [0, 0],
                                [width-1, 0],
                                [width-1, height-1]], dtype="float32")
            M = cv2.getPerspectiveTransform(src_pts, dst_pts)
            warped = cv2.warpPerspective(img, M, (width, height))


            # Run OCR on cropped image
            # If the predicted value is digit print else rotate first
            result = reader.readtext(warped)
            print(result)
            predicted_digit = result[0][1]

            if np.char.isdigit(predicted_digit) == True:
                cv2.imshow("warped " + ROI,warped)
            else: 
                rot_img = warped.copy()
                for i in range(0, 3):
                    rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
                    result = reader.readtext(rotated_image)
                    #if np.array(result).size == 0: 
                    #  continue
                    if not result:
                        rot_img = rotated_image 
                        continue
                    #if len(result) == 0:
                    #  continue
                    predicted_digit = result[0][1]
                    #print(result)
                    #print(predicted_digit)
                    #cv2.imshow(rotated_image)
                    if np.char.isdigit(predicted_digit) == True:
                        cv2.imshow("Image " + ROI, rotated_image)
                        break
                    rot_img = rotated_image                

    return predicted_digit

def sortNumbers(Cubes):

    Cubes = sorted(Cubes, key=lambda x: int(x[2]))

    #Cubes.sort(key=itemgetter(2))  # In-place sorting
    #Cubes = sorted(Cubes, key=itemgetter(2))  # Create a new list

    return Cubes
        

#img = cv2.imread('gulRecNum.jpg') 
img = cv2.imread('webcam7.png') 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 


# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)

# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

result2 = img.copy()

mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0

for c in contours:
    cv2.drawContours(result2,[c],0,(0,0,0),2)
    # get rotated rectangle from contour
    rot_rect = cv2.minAreaRect(c)
    box = cv2.boxPoints(rot_rect)
    box = np.int0(box)
    # draw rotated rectangle on copy of img
    cv2.drawContours(result2,[box],0,(0,0,0),2)
    # Gør noget hvis arealet er større end 1.
    # Whats the area of the component?
    areal = cv2.contourArea(c)
    if(areal > 1):
        # get the center of mass
        M = cv2.moments(c)
        cx = int(M['m10']/M['m00'])
        cy = int(M['m01']/M['m00'])
        center = (cx, cy)
        print("\nx: ",cx,"\ny: ",cy)
        color = (0, 0, 255)

        cv2.circle(result2, center, 3, color, -1)
        cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
        cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)

        x,y,w,h = cv2.boundingRect(c)
        ROI = 255 - thresh[y:y+h, x:x+w]
        cv2.drawContours(mask, [c], -1, (255,255,255), -1)
        cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)

        #Read saved image (number)
        result = getNumber('ROI_{}.png'.format(ROI_number))
        print("ROI_number: ", result)
        Cubes.append([cx, cy, result])
        ROI_number += 1

# save result
cv2.imwrite("4cubes_result2.png",result2)   

# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()

I get the following error (it can't detect a number)

Traceback (most recent call last): File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 169, in <module> result = getNumber('ROI_{}.png'.format(ROI_number)) File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 70, in getNumber predicted_digit = result[0][1] IndexError: list index out of range

Many ways to solve. I see text is aligned with white region. One way is to get each white region, align with axis in any direction, rotate by 90 degrees 4 time. Each time run ocr and get the one with more confidence. — B200011011, Nov 24 '20 at 20:38
@B200011011 So how exacly whould you do this? I've never tried this before — Me NoLonely, Nov 24 '20 at 20:52
@B200011011 made a edit to my post. I can't get the code to read this new number on the yellow cube (from my actually webcam) — Me NoLonely, Nov 30 '20 at 14:13

score 1 · Accepted Answer · answered Nov 25 '20 at 20:38

This is implementation of my comment. Since, I do not have individual images this code will work with given grid like processed image.

For OCR I used EasyOCR instead of Tesserect. You could also try pytesserect on each output cropped images. Instead of rotating 4 times by 90 degrees by confidence, I went with digit detection on OCR result. If a detection is not a number then only rotate and retry.

Tested on google colab. Replace cv2_imshow(...) with cv2.imshow(...) for working locally. Also remove from google.colab.patches import cv2_imshow import.

This is modified version of my answer on card orientation correction here, OpenCV: using Canny and Shi-Tomasi to detect round corners of a playing card. All previous code is left as comment.

Code

!pip install easyocr

import easyocr
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory

"""
Based on my answer of rotated card detection,
https://stackoverflow.com/questions/64860785/opencv-using-canny-and-shi-tomasi-to-detect-round-corners-of-a-playing-card/64862448#64862448
"""


import cv2
import numpy as np
from google.colab.patches import cv2_imshow


img = cv2.imread('1.jpg')


gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)

#cv2_imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)



## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)


## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10



#################################################################
# Four point perspective transform
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
#################################################################

def order_points(pts):
    # initialzie a list of coordinates that will be ordered
    # such that the first entry in the list is the top-left,
    # the second entry is the top-right, the third is the
    # bottom-right, and the fourth is the bottom-left
    rect = np.zeros((4, 2), dtype = "float32")
    # the top-left point will have the smallest sum, whereas
    # the bottom-right point will have the largest sum
    s = pts.sum(axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    # now, compute the difference between the points, the
    # top-right point will have the smallest difference,
    # whereas the bottom-left will have the largest difference
    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    # return the ordered coordinates
    return rect


def four_point_transform(image, pts):
    # obtain a consistent order of the points and unpack them
    # individually
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    # compute the width of the new image, which will be the
    # maximum distance between bottom-right and bottom-left
    # x-coordiates or the top-right and top-left x-coordinates
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
    # compute the height of the new image, which will be the
    # maximum distance between the top-right and bottom-right
    # y-coordinates or the top-left and bottom-left y-coordinates
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    # now that we have the dimensions of the new image, construct
    # the set of destination points to obtain a "birds eye view",
    # (i.e. top-down view) of the image, again specifying points
    # in the top-left, top-right, bottom-right, and bottom-left
    # order
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")
    # compute the perspective transform matrix and then apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    # return the warped image
    return warped


#################################################################
#print(len(contours))





tmp_img = img.copy()

for cnt in contours:
    approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
    ## calculate number of vertices
    #print(len(approx))


    ## Get the largest contours only
    ## Side count cannot be used since contours are not all rectangular
    if cv2.contourArea(cnt) > lp_area:
    #if len(approx) == 4 and cv2.contourArea(cnt) > lp_area:
        
        # print("\n\n")
        # print("#################################################")
        # print("rectangle")
        # print("#################################################")
        # print("\n\n")


        #tmp_img = img.copy()
        #cv2.drawContours(tmp_img, [cnt], 0, (0, 255, 0), 6)
        #cv2_imshow(tmp_img)
        #cv2.imshow('Contour Borders', tmp_img)
        #cv2.waitKey(0)


        # tmp_img = img.copy()
        # cv2.drawContours(tmp_img, [cnt], 0, (255, 0, 255), -1)
        # cv2_imshow(tmp_img)
        # #cv2.imshow('Contour Filled', tmp_img)
        # #cv2.waitKey(0)


        # # Make a hull arround the contour and draw it on the original image
        # tmp_img = img.copy()
        # mask = np.zeros((img.shape[:2]), np.uint8)
        # hull = cv2.convexHull(cnt)
        # cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)
        # cv2_imshow(mask)
        # #cv2.imshow('Convex Hull Mask', mask)
        # #cv2.waitKey(0)


        # # Draw minimum area rectangle
        # #tmp_img = img.copy()
        # rect = cv2.minAreaRect(cnt)
        # box = cv2.boxPoints(rect)
        # box = np.int0(box)
        # cv2.drawContours(tmp_img, [box], 0, (255, 0, 0), 2)
        # #cv2_imshow(tmp_img)
        # #cv2.imshow('Minimum Area Rectangle', tmp_img)
        # #cv2.waitKey(0)


        # Draw box corners and minimum area rectangle
        #tmp_img = img.copy()
        rect = cv2.minAreaRect(cnt)
        box = cv2.boxPoints(rect)
        box = np.int0(box)
        #print(rect)
        #print(box)
        cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
        cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
        cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
        cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
        cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
        #cv2_imshow(tmp_img)
        #cv2.imshow('Minimum Area Rectangle', tmp_img)
        #cv2.waitKey(0)



        ## Correct orientation and crop
        # Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
        width = int(rect[1][0])
        height = int(rect[1][1])
        src_pts = box.astype("float32")
        dst_pts = np.array([[0, height-1],
                            [0, 0],
                            [width-1, 0],
                            [width-1, height-1]], dtype="float32")
        M = cv2.getPerspectiveTransform(src_pts, dst_pts)
        warped = cv2.warpPerspective(img, M, (width, height))
        #cv2_imshow(warped)



        # Run OCR on cropped image
        # If the predicted value is digit print else rotate first
        result = reader.readtext(warped)
        predicted_digit = result[0][1]
        print("Detected Text:")

        if np.char.isdigit(predicted_digit) == True:
          print(result)
          print(predicted_digit)
          cv2_imshow(warped)
        else: 
          rot_img = warped.copy()
          for i in range(0, 3):
            rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
            result = reader.readtext(rotated_image)
            #if np.array(result).size == 0: 
            #  continue
            if not result:
              rot_img = rotated_image 
              continue
            #if len(result) == 0:
            #  continue
            predicted_digit = result[0][1]
            #print(result)
            #print(predicted_digit)
            #cv2_imshow(rotated_image)
            if np.char.isdigit(predicted_digit) == True:
              print(result)
              print(predicted_digit)
              cv2_imshow(rotated_image)
              break
            rot_img = rotated_image
              
        


        # # Draw bounding rectangle
        # #tmp_img = img.copy()
        # x, y, w, h = cv2.boundingRect(cnt)
        # cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
        # #cv2_imshow(tmp_img)
        # #cv2.imshow('Bounding Rectangle', tmp_img)
        # #cv2.waitKey(0)


        # # Bounding Rectangle and Minimum Area Rectangle
        # #tmp_img = img.copy()
        # rect = cv2.minAreaRect(cnt)
        # box = cv2.boxPoints(rect)
        # box = np.int0(box)
        # cv2.drawContours(tmp_img, [box], 0, (0, 0, 255), 2)
        # x, y, w, h = cv2.boundingRect(cnt)
        # cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
        # #cv2_imshow(tmp_img)
        # #cv2.imshow('Bounding Rectangle', tmp_img)
        # #cv2.waitKey(0)


        # # determine the most extreme points along the contour
        # # https://www.pyimagesearch.com/2016/04/11/finding-extreme-points-in-contours-with-opencv/
        # tmp_img = img.copy()
        # extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
        # extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
        # extTop = tuple(cnt[cnt[:, :, 1].argmin()][0])
        # extBot = tuple(cnt[cnt[:, :, 1].argmax()][0])
        # cv2.drawContours(tmp_img, [cnt], -1, (0, 255, 255), 2)
        # cv2.circle(tmp_img, extLeft, 8, (0, 0, 255), -1)
        # cv2.circle(tmp_img, extRight, 8, (0, 255, 0), -1)
        # cv2.circle(tmp_img, extTop, 8, (255, 0, 0), -1)
        # cv2.circle(tmp_img, extBot, 8, (255, 255, 0), -1)


        # print("Corner Points: ", extLeft, extRight, extTop, extBot)

        # cv2_imshow(tmp_img)
        # #cv2.imshow('img contour drawn', tmp_img)
        # #cv2.waitKey(0)
        # #cv2.destroyAllWindows()



        # ## Perspective Transform
        # tmp_img = img.copy()
        # pts = np.array([extLeft, extRight, extTop, extBot])
        # warped = four_point_transform(tmp_img, pts)
        # cv2_imshow(tmp_img)
        # #cv2.imshow("Warped", warped)
        # #cv2.waitKey(0)


cv2_imshow(tmp_img)


#cv2.destroyAllWindows()

Output Prediction

Detected Text:
[([[85, 67], [131, 67], [131, 127], [85, 127]], '1', 0.9992043972015381)]
1

Detected Text:
[([[85, 65], [133, 65], [133, 125], [85, 125]], '2', 0.9991914629936218)]
2

Detected Text:
[([[96, 72], [144, 72], [144, 128], [96, 128]], '4', 0.9996564984321594)]
4

Detected Text:
[([[88, 76], [132, 76], [132, 132], [88, 132]], '3', 0.9973381161689758)]
3

White Region Detection With Corners

Alternate methods,

Try pretrained digit classification model trained from MNIST and others on each large contours exceeding certain area.
Use multitask object detection with rotation. One output of network will be detections another angle regression to predict orientation.
Use text detector like, East and run OCR on each detected text.

Hey again @B200011011, I've made an edit to my pose, where I have another picture (the one im going to use. But it cant detect the cube with the number seven (7) — Me NoLonely, Nov 30 '20 at 14:12
Please post the new information as a new question as my answer was specific to the given binary images. As for new image `object detection` is another approach. Look into datasets like `SVHN`, http://ufldl.stanford.edu/housenumbers/. You can also try with your own dataset. Alternate approach is text region detection then applying ocr to that part. — B200011011, Nov 30 '20 at 15:43

pytesseract detects the wrong integer values

1 Answers1

Code