3

Need your help. Now I'm writing python script to recognize text in a shape. This shape can be captured from RTSP (IP Camera) at any angle. For the example see attached file. My code is here, but coords to crop rotated shape is sets manually

import cv2
import numpy as np


def main():
    fn  = cv2.VideoCapture("rtsp://admin:Admin123-@172.16.10.254")
    flag, img = fn.read()
    cnt = np.array([
            [[64, 49]],
            [[122, 11]],
            [[391, 326]],
            [[308, 373]]
        ])
    print("shape of cnt: {}".format(cnt.shape))
    rect = cv2.minAreaRect(cnt)
    print("rect: {}".format(rect))

    box = cv2.boxPoints(rect)
    box = np.int0(box)

    print("bounding box: {}".format(box))
    cv2.drawContours(img, [box], 0, (0, 255, 0), 2)

    img_crop, img_rot = crop_rect(img, rect)

    print("size of original img: {}".format(img.shape))
    print("size of rotated img: {}".format(img_rot.shape))
    print("size of cropped img: {}".format(img_crop.shape))

    new_size = (int(img_rot.shape[1]/2), int(img_rot.shape[0]/2))
    img_rot_resized = cv2.resize(img_rot, new_size)
    new_size = (int(img.shape[1]/2)), int(img.shape[0]/2)
    img_resized = cv2.resize(img, new_size)

    cv2.imshow("original contour", img_resized)
    cv2.imshow("rotated image", img_rot_resized)
    cv2.imshow("cropped_box", img_crop)

    # cv2.imwrite("crop_img1.jpg", img_crop)
    cv2.waitKey(0)

def crop_rect(img, rect):
    # get the parameter of the small rectangle
    center = rect[0]
    size = rect[1]
    angle = rect[2]
    center, size = tuple(map(int, center)), tuple(map(int, size))

# get row and col num in img
height, width = img.shape[0], img.shape[1]
print("width: {}, height: {}".format(width, height))

M = cv2.getRotationMatrix2D(center, angle, 1)
img_rot = cv2.warpAffine(img, M, (width, height))

img_crop = cv2.getRectSubPix(img_rot, size, center)

return img_crop, img_rot


if __name__ == "__main__":
    main()

example pic

hasanovmax
  • 31
  • 3
  • Use minAreaRect to get the rotation angle of the region and rotate it to horizontal before doing text OCR. See for example, Text skew correction with OpenCV and Python - PyImageSearchwww.pyimagesearch.com › 2017/02/20 › text-skew-correction-openc... – fmw42 Apr 02 '20 at 16:40
  • Have you tried pytesseract, which is designed for this kind of job: https://pypi.org/project/pytesseract/ ? – George Ogden Nov 22 '21 at 21:26

1 Answers1

0

You may start with the example in the following post.
The code sample detects the license plate, and it also detects your "shape" with text.

After detecting the "shape" with the text, you may use the following stages:

  • Apply threshold the cropped area.
  • Find contours, and find the contour with maximum area.
  • Build a mask, and mask area outside the contour (like in the license plate example).
  • Use minAreaRect (as fmw42 commented), and get the angle of the rectangle.
  • Rotate the cropped area (by angle+90 degrees).
  • Apply OCR using pytesseract.image_to_string.

Here is the complete code:

import cv2
import numpy as np
import imutils
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # I am using Windows

# Read the input image
img = cv2.imread('Admin123.jpg')

# Reused code:
# https://stackoverflow.com/questions/60977964/pytesseract-not-recognizing-text-as-expected/60979089#60979089
################################################################################
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #convert to grey scale
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200) #Perform Edge detection

cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None

# loop over our contours
for c in cnts:
    # approximate the contour
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.018 * peri, True)
    # if our approximated contour has four points, then
    # we can assume that we have found our screen
    if len(approx) == 4:
        screenCnt = approx
        break

# Masking the part other than the "shape"
mask = np.zeros(gray.shape,np.uint8)
new_image = cv2.drawContours(mask,[screenCnt],0,255,-1,)
new_image = cv2.bitwise_and(img,img,mask=mask)

# Now crop
(x, y) = np.where(mask == 255)
(topx, topy) = (np.min(x), np.min(y))
(bottomx, bottomy) = (np.max(x), np.max(y))
cropped = gray[topx:bottomx+1, topy:bottomy+1]
################################################################################

# Apply threshold the cropped area
_, thresh = cv2.threshold(cropped, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Find contours
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)

# Get contour with maximum area
c = max(cnts, key=cv2.contourArea)

# Build a mask (same as the code above)
mask = np.zeros(cropped.shape, np.uint8)
new_cropped = cv2.drawContours(mask, [c], 0, 255, -1)
new_cropped = cv2.bitwise_and(cropped, cropped, mask=mask)

# Draw green rectangle for testing
test = cv2.cvtColor(new_cropped, cv2.COLOR_GRAY2BGR)
cv2.drawContours(test, [c], -1, (0, 255, 0), thickness=2)

# Use minAreaRect as fmw42 commented
rect = cv2.minAreaRect(c)
angle = rect[2]  # Get angle of the rectangle

# Rotate the cropped rectangle.
rotated_cropped = imutils.rotate(new_cropped, angle + 90)

# Read the text in the "shape"
text = pytesseract.image_to_string(rotated_cropped, config='--psm 3')
print("Extracted text is:\n\n", text)

# Show images for testing:
cv2.imshow('cropped', cropped)
cv2.imshow('thresh', thresh)
cv2.imshow('test', test)
cv2.imshow('rotated_cropped', rotated_cropped)
cv2.waitKey(0)
cv2.destroyAllWindows()

OCR output result:

 AB12345
DEPARTMENT OF
INFORMATION

COMMUNICATION
TECHNOLOGY

cropped:
enter image description here

thresh:
enter image description here

test:
enter image description here

rotated_cropped:
enter image description here

Rotem
  • 30,366
  • 4
  • 32
  • 65
  • But How I can recognize text when text is rotated by minus angle, for example -90 degree. – hasanovmax Apr 03 '20 at 10:12
  • I don't know if there is a way telling if the text is roteted, but it is simple to rotate the rectangular area by 90 degrees. – Rotem Apr 03 '20 at 13:19