4

I'm using pytesseract (0.3.2) with openCV (4.1.2) to identify digits in images. While image_to_string is working, image_to_data and image_to_boxes are not. I need to be able to draw the bounding boxes on the images and this has stumped me. I've tried different images, older versions of pytesseract, etc. I'm using Windows and Jupyter Notebooks.

import cv2 
import pytesseract

#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#grayscale
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

#thresholding
def thresholding(image):
    #return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
    return cv2.threshold(image, 200, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

img = cv2.imread('my_image.jpg')
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'

gray = get_grayscale(img)
thresh = thresholding(gray)
erode = remove_noise(thresh)

custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
print(pytesseract.image_to_string(erode, config=custom_config))

cv2.imwrite("test.jpg", erode)

#these return nothing
print(pytesseract.image_to_boxes(Image.open('test.jpg')))
print(pytesseract.image_to_data(Image.open('test.jpg')))
revod
  • 41
  • 1
  • 3

2 Answers2

1

Instead of using image_to_boxes, an alternative approach is to simply find contours with cv2.findContours, obtain the bounding rectangle coordinates with cv2.boundingRect, and draw the bounding box with cv2.rectangle

Using this sample input image

enter image description here

Drawn boxes

enter image description here

Result from OCR

1234567890

Code

import cv2
import pytesseract
import numpy as np

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Draw bounding boxes
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)

# OCR
data = pytesseract.image_to_string(255 - thresh, lang='eng',config='--psm 6')
print(data)

cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
nathancy
  • 42,661
  • 14
  • 115
  • 137
  • If funny how 10 years later the same questions are still popping up, over and over again. Anyway, you're doing a pretty good job helping people! – karlphillip Feb 01 '20 at 12:45
  • 1
    I don't see a way to print out predicted characters with their corresponding boxes using this method. – revod Feb 03 '20 at 15:37
  • I don't understand, isn't the bounding boxes what you want? – nathancy Feb 03 '20 at 21:05
1

Please try the following code:

from pytesseract import Output
import pytesseract
import cv2
 
image = cv2.imread("my_image.jpg")

#swap color channel ordering from BGR (OpenCV’s default) to RGB (compatible with Tesseract and pytesseract).
# By default OpenCV stores images in BGR format and since pytesseract assumes RGB format,
# we need to convert from BGR to RGB format/mode:
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'
custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
results = pytesseract.image_to_data(rgb, output_type=Output.DICT,lang='eng',config=custom_config)
boxresults = pytesseract.image_to_boxes(rgb,output_type=Output.DICT,lang='eng',config=custom_config)
print(results)
print(boxresults)

for i in range(0, len(results["text"])):
    # extract the bounding box coordinates of the text region from the current result
    tmp_tl_x = results["left"][i]
    tmp_tl_y = results["top"][i]
    tmp_br_x = tmp_tl_x + results["width"][i]
    tmp_br_y = tmp_tl_y + results["height"][i] 
    tmp_level = results["level"][i]
    conf = results["conf"][i]
    text = results["text"][i]
    
    if(tmp_level == 5):
        cv2.putText(image, text, (tmp_tl_x, tmp_tl_y - 10), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 1)
        cv2.rectangle(image, (tmp_tl_x, tmp_tl_y), (tmp_br_x, tmp_br_y), (0, 0, 255), 1)
        
for j in range(0,len(boxresults["left"])):
    left = boxresults["left"][j]
    bottom = boxresults["bottom"][j]
    right = boxresults["right"][j]
    top = boxresults["top"][j] 
    cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 1)
       
    
cv2.imshow("image",image)
cv2.waitKey(0)
livezingy
  • 161
  • 1
  • 5