I have a following question. I would like to read these types of captcha in python:
The best code I have done is this, however it is not able to solve all these captchas:
import pytesseract
import cv2
import numpy as np
import re
def odstran_sum(img,threshold):
"""Funkce odstrani sum."""
filtered_img = np.zeros_like(img)
labels,stats= cv2.connectedComponentsWithStats(img.astype(np.uint8),connectivity=8)[1:3]
label_areas = stats[1:, cv2.CC_STAT_AREA]
for i,label_area in enumerate(label_areas):
if label_area > threshold:
filtered_img[labels==i+1] = 1
return filtered_img
def preprocess(img_path):
"""Konvertuje do binary obrazku."""
img = cv2.imread(img_path,0)
blur = cv2.GaussianBlur(img, (3,3), 0)
thresh = cv2.threshold(blur, 150, 255, cv2.THRESH_BINARY_INV)[1]
filtered_img = 255-odstran_sum(thresh,20)*255
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
erosion = cv2.erode(filtered_img,kernel,iterations = 1)
return erosion
def captcha_to_string(obrazek):
"""Funkce vrati text z captchy"""
text = pytesseract.image_to_string(obrazek)
return re.sub(r'[^\x00-\x7F]+',' ', text).strip()
img = preprocess(CAPTCHA_NAME)
text = captcha_to_string(img)
print(text)
Is it possible to improve my code that it will be able to read all these five examples? Thanks a lot.