Transparent Captcha Image with Horizontal Line

Question

def resolve(img_path):
    image = Image.open(img_path)
    new_image = Image.new("RGBA", image.size, "WHITE")  # Create a white rgba background
    new_image.paste(image, (0, 0), image)  # Paste the image on the background.
    new_image.convert('RGB').save(img_path, "JPEG")  # Save as JPEG
    enhancedImage = enhance(img_path)
    return pytesseract.image_to_string(img_path)

def enhance(img_path):
    image1 = cv2.imread(img_path)
    #print(image1)
    img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    ret, thresh1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
    #thresh = 50
    #im_bw = cv2.threshold(thresh3, thresh, 255, cv2.THRESH_BINARY)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 2))
    erosion = cv2.erode(thresh1, kernel, iterations = 1)
    return erosion

I'm trying to solve captcha for above images. Tried converting transparent bg to white and then enchancing the image but results are not correct.

score 3 · Answer 1 · answered Jan 20 '21 at 18:24

Let me start with the potential problem with your code

def enhance(img_path):
    image1 = cv2.imread(img_path)

Now, if you read it with imread the result will be:

You can't read it with pytesseract from the output image.

This is a known issue stated in this answer: cv2 imread transparency gone

As mentioned in the answer:

put a white image behind the transparent one an with that you solve the problem.

We will apply the same technique and now result will be:

As for the second image result will be:

We will be doing the following steps for efficiently reading from the output image:

1. Resize the image
1. Apply adaptive-threshold

For the first image the result will be:

For the second image the result will be:

Now when you read it with pytesseract with mode 6 (modes), result will be:

3daab
b42yb

Code:

import cv2
from PIL import Image
from pytesseract import image_to_string


def save_transparent_image(image_path, save_name):
    image = Image.open(image_path).convert("RGBA")
    canvas = Image.new(mode='RGBA',
                       size=image.size, color=(255, 255, 255, 255))
    canvas.paste(image, mask=image)
    canvas.save(save_name, format="PNG")


img_lst = ["o3upS.png", "kGpYk.png"]

for i, img_name in enumerate(img_lst):
    save_image = "captcha" + str(i) + ".png"
    save_transparent_image(img_name, save_image)

    # Step 1: Resize the image
    img = cv2.imread(save_image)
    (h, w) = img.shape[:2]
    img = cv2.resize(img, (w*2, h*2))

    # Step 2: Apply adaptive-threshold
    gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                cv2.THRESH_BINARY, 33, 79)

    # Step 3: Read the threshold image
    txt = image_to_string(thr, config="--psm 6")
    txt = txt.split("\n")[0]
    print(txt)

Question Will this code will works for the other captchas?

No, it won't. Unless the captchas are similar to the given example. You need to change the adaptive-threshold's block-size and C parameters, see if it works.

Transparent Captcha Image with Horizontal Line

1 Answers1

Linked