Segmenting image files with text (and pictures) into blocks

Question

I'm trying to create bounding boxes for the text in an image I have. An example is the one below.

I would like to add a bounding box around each This is a test line. Unfortunately I'm not sure why this method is not automatically identifying the bounding boxes

import re
import cv2
import numpy as np
import pytesseract
from pytesseract import Output
from matplotlib import pyplot as plt


# Plot character boxes on image using pytesseract.image_to_boxes() function
image = cv2.imread('Image.jpg')
b, g, r = cv2.split(image)
image = cv2.merge([r,g,b])
d = pytesseract.image_to_data(image, output_type=Output.DICT)
print('DATA KEYS: \n', d.keys())

n_boxes = len(d['text'])
for i in range(n_boxes):
    # condition to only pick boxes with a confidence > 60%
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        image = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

b, g, r = cv2.split(image)
rgb_img = cv2.merge([r, g, b])
plt.figure(figsize=(16, 12))
plt.imshow(rgb_img)
plt.title('SAMPLE IMAGE WITH WORD LEVEL BOXES')
plt.show()

Do you have a specific issue? There must be a million factors which could be causing this, no? — AMC, Apr 14 '20 at 01:46

score 2 · Accepted Answer · answered Apr 14 '20 at 02:00

Here is a different way to do that with Python/OpenCV.

Read the input
Convert to gray
(OTSU) Threshold (white text on black background)
Apply morphology dilate with horizontal kernel longer than letter spacing and then smaller vertical kernel to remove thin horizontal lines remaining from line in page.
Find contours
Draw bounding boxes of contours on input
Save result

Input:

import cv2
import numpy as np

# load image
img = cv2.imread("test_text.jpg")

# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# threshold the grayscale image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# use morphology erode to blur horizontally
#kernel = np.ones((500,3), np.uint8)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (250, 3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 17))
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)

# find contours
cntrs = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]

# Draw contours
result = img.copy()
for c in cntrs:
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)

# write result to disk
cv2.imwrite("test_text_threshold.png", thresh)
cv2.imwrite("test_text_morph.png", morph)
cv2.imwrite("test_text_lines.jpg", result)

cv2.imshow("GRAY", gray)
cv2.imshow("THRESH", thresh)
cv2.imshow("MORPH", morph)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()

Thresholded image:

Dilated image:

Result:

This is exactly what I was attempting to do. Do you have a resource that expands more into this segmenting images into different parts? — Black, Apr 15 '20 at 00:25
No, not anything more specific. I do not know your use case. But read the OpenCV documentation at https://docs.opencv.org/4.1.1/ and search Google. You will find many examples. — fmw42, Apr 15 '20 at 00:34

Segmenting image files with text (and pictures) into blocks

1 Answers1

Linked