i have tried multiple programs to extract text from this image using opencv and pytesseract, but it returns empty string. Here's my code
import cv2
import pytesseract
# Set the path to the pytesseract executable
pytesseract.pytesseract.tesseract_cmd = r'<path_to_tesseract_executable>'
# Load the image using OpenCV
image = cv2.imread('<path_to_image>')
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Threshold the image to convert green text to black and white background to black
_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Apply OCR using pytesseract
text = pytesseract.image_to_string(threshold)
# Print the extracted text
print(text)
I was able extract some texts upto few extent from the comment by @Kache but not accurately. As i dont have much knowledge of opencv. Here's my updated code
def solve_captcha(image_src):
# Load the image using OpenCV
image = cv2.imread(image_src)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)
# Find contours of black dots
contours, _ = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw white filled circles on the black dots
for contour in contours:
area = cv2.contourArea(contour)
if area < 100: # Adjust this threshold based on the size of your black dots
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), thickness=cv2.FILLED)
# Convert the modified image to grayscale
gray_modified = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply OCR using pytesseract
text = pytesseract.image_to_string(gray_modified)
# Print the extracted text
return "".join(ch for ch in text if ch.isupper() or ch.isdigit())