I'm trying to create bounding boxes for the text in an image I have. An example is the one below.
I would like to add a bounding box around each This is a test
line. Unfortunately I'm not sure why this method is not automatically identifying the bounding boxes
import re
import cv2
import numpy as np
import pytesseract
from pytesseract import Output
from matplotlib import pyplot as plt
# Plot character boxes on image using pytesseract.image_to_boxes() function
image = cv2.imread('Image.jpg')
b, g, r = cv2.split(image)
image = cv2.merge([r,g,b])
d = pytesseract.image_to_data(image, output_type=Output.DICT)
print('DATA KEYS: \n', d.keys())
n_boxes = len(d['text'])
for i in range(n_boxes):
# condition to only pick boxes with a confidence > 60%
if int(d['conf'][i]) > 60:
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
image = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
b, g, r = cv2.split(image)
rgb_img = cv2.merge([r, g, b])
plt.figure(figsize=(16, 12))
plt.imshow(rgb_img)
plt.title('SAMPLE IMAGE WITH WORD LEVEL BOXES')
plt.show()