I am trying to use Pytesseract to read the digits from the following image:
Unfortunately, the program is not returning with any solution, even after using greyscale, thresholding, noise detection or canny edge detection. When using a config to whitelist only digits and $/, the program stops detecting even the high resolution image. (here)
The code is as follows:
class NumberAnalyser:
# boilerplate code to pre-process image
# get grayscale image
def get_grayscale(self, image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(self, image):
return cv2.medianBlur(image, 5)
# thresholding
def thresholding(self, image):
gray = self.get_grayscale(image)
(T, threshInv) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# visualize only the masked regions in the image
masked = cv2.bitwise_not(gray, gray, mask=threshInv)
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
return thresh4
# dilation
def dilate(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.dilate(image, kernel, iterations=1)
# erosion
def erode(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.erode(image, kernel, iterations=1)
# opening - erosion followed by dilation
def opening(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
# canny edge detection
def canny(self, image):
return cv2.Canny(image, 100, 200)
# skew correction
def deskew(self, image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
# template matching
def match_template(self, image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
def numbers(self, img_path):
reader = cv2.imread(img_path)
# reader = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_RGB2BGR)'
gray = self.get_grayscale(reader)
thresh = self.thresholding(reader)
opening = self.opening(reader)
canny = self.canny(reader)
noiseless = self.remove_noise(reader)
# cv2.imshow('canny', canny)
# cv2.waitKey(0)
# cv2.imshow('gray', gray)
# cv2.waitKey(0)
cv2.imshow('threshold', thresh)
cv2.waitKey(0)
# cv2.imshow('opening', opening)
# cv2.waitKey(0)
# cv2.imshow('noise removal', noiseless)
# cv2.waitKey(0)
# cv2.imshow('og', reader)
# cv2.waitKey(0)
print('yes')
print(pt.image_to_string(thresh, config='--psm 11, -c tessedit_char_whitelist=$,0123456789'))
The --psm 11 configuration addition/deletion does not change anything.
Any help would be super appreciated!