How to merge nearby bounding boxes OpenCV from Top To Bottom

Question

I want to detect text lines in a document image which contains some mathematical equation. The problem is that mathematical equations are large in height compare to normal alphabets. I've successfully detected them also without any DL or ML model. But some problems still exists. I want to join some bounding boxes that comes too close in top to bottom distance in Open cv. I found these solutions in Stack Overflow. Solution 1 Solution 2 Solution 3 Solution 4 and some more examples also. I made my own script to solve this problem. Below is my script.

Input Image

Result Image

Main Issue

The problem is that it is taking mathematics fractions in two bounding boxes. I want to take them in one single bounding box, so that I can then crop this and feed this to an OCR to extract text from it. Problem Image

Can anyone suggest me how can I join these very nearby bounding boxes into a single one.Basically I want to set a threshold value, when two bounding boxes' distance is less than that then I want to merge them to a single one.

I'm new to Open CV, I've searched a lot in stack overflow and got many solutions to this, but none of them worked for me.

import cv2
import numpy as np
import imutils
from imutils import contours
from functools import cmp_to_key

def union(a, b):
    x = min(a[0], b[0])
    y = min(a[1], b[1])
    w = max(a[0] + a[2], b[0] + b[2]) - x
    h = max(a[1] + a[3], b[1] + b[3]) - y
    return [x, y, w, h]
def _intersect(a, b):
    x = max(a[0], b[0])
    y = max(a[1], b[1])
    w = min(a[0] + a[2], b[0] + b[2]) - x
    h = min(a[1] + a[3], b[1] + b[3]) - y
    if h < 0:  # in original code :  if w<0 or h<0:
        return False
    return True

def _group_rectangles(rec):
    """
    Uion intersecting rectangles.
    Args:
        rec - list of rectangles in form [x, y, w, h]
    Return:
        list of grouped ractangles
    """
    tested = [False for i in range(len(rec))]
    final = []
    i = 0
    while i < len(rec):
        if not tested[i]:
            j = i + 1
            while j < len(rec):
                if not tested[j] and _intersect(rec[i], rec[j]):
                    rec[i] = union(rec[i], rec[j])
                    tested[j] = True
                    j = i
                j += 1
            final += [rec[i]]
        i += 1
    return final

def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0
    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True
    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1
    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),key=lambda b: b[1][i], reverse=reverse))
    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

def detectChunks(fileName_):
    image = cv2.imread(fileName_)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.medianBlur(gray, 3)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 2))
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
    dilate = cv2.dilate(opening, dilate_kernel, iterations=4)
    cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    (cnts, boundingBoxes) = imutils.contours.sort_contours(cnts, method="left-to-right")
    (cnts, boundingBoxes) = imutils.contours.sort_contours(cnts, method="top-to-bottom")

    index = 0

    boundingBoxes = list(boundingBoxes)
    boundingBoxes = _group_rectangles(boundingBoxes)
    
    for c in boundingBoxes:
        x, y, w, h = c

        # if (index < len(cnts) - 1):
        index += 1
        # print(index)

        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 1)  
    
    
    returningArray = []
    cv2.imwrite("Test/Result.png", image)
    cv2.waitKey()
detectChunks(r"Test\2021_A_10.jpg")

How to merge nearby bounding boxes OpenCV from Top To Bottom

0 Answers0