I want to detect text lines in a document image which contains some mathematical equation. The problem is that mathematical equations are large in height compare to normal alphabets. I've successfully detected them also without any DL or ML model. But some problems still exists. I want to join some bounding boxes that comes too close in top to bottom distance in Open cv. I found these solutions in Stack Overflow. Solution 1 Solution 2 Solution 3 Solution 4 and some more examples also. I made my own script to solve this problem. Below is my script.
Main Issue
The problem is that it is taking mathematics fractions in two bounding boxes. I want to take them in one single bounding box, so that I can then crop this and feed this to an OCR to extract text from it. Problem Image
Can anyone suggest me how can I join these very nearby bounding boxes into a single one.Basically I want to set a threshold value, when two bounding boxes' distance is less than that then I want to merge them to a single one.
I'm new to Open CV, I've searched a lot in stack overflow and got many solutions to this, but none of them worked for me.
import cv2
import numpy as np
import imutils
from imutils import contours
from functools import cmp_to_key
def union(a, b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0] + a[2], b[0] + b[2]) - x
h = max(a[1] + a[3], b[1] + b[3]) - y
return [x, y, w, h]
def _intersect(a, b):
x = max(a[0], b[0])
y = max(a[1], b[1])
w = min(a[0] + a[2], b[0] + b[2]) - x
h = min(a[1] + a[3], b[1] + b[3]) - y
if h < 0: # in original code : if w<0 or h<0:
return False
return True
def _group_rectangles(rec):
"""
Uion intersecting rectangles.
Args:
rec - list of rectangles in form [x, y, w, h]
Return:
list of grouped ractangles
"""
tested = [False for i in range(len(rec))]
final = []
i = 0
while i < len(rec):
if not tested[i]:
j = i + 1
while j < len(rec):
if not tested[j] and _intersect(rec[i], rec[j]):
rec[i] = union(rec[i], rec[j])
tested[j] = True
j = i
j += 1
final += [rec[i]]
i += 1
return final
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
def detectChunks(fileName_):
image = cv2.imread(fileName_)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
dilate = cv2.dilate(opening, dilate_kernel, iterations=4)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
(cnts, boundingBoxes) = imutils.contours.sort_contours(cnts, method="left-to-right")
(cnts, boundingBoxes) = imutils.contours.sort_contours(cnts, method="top-to-bottom")
index = 0
boundingBoxes = list(boundingBoxes)
boundingBoxes = _group_rectangles(boundingBoxes)
for c in boundingBoxes:
x, y, w, h = c
# if (index < len(cnts) - 1):
index += 1
# print(index)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 1)
returningArray = []
cv2.imwrite("Test/Result.png", image)
cv2.waitKey()
detectChunks(r"Test\2021_A_10.jpg")