I'm having problem when binary the image then use HoughLineP function to get vertical and horizontal lines, but look like my threshold parameter or not filter image before binary it cause some dots on the line, then the HoughLineP can not detect exactly the lines. Is there anyway to fix this?
My code (references https://github.com/fazlurnu/Text-Extraction-Table-Image):
main.py
from preprocessing import get_grayscale, get_binary, invert_area, draw_text, detect, image_resize
from ROI_selection import detect_lines, get_ROI
import cv2 as cv
filename = 'images/tmp2.png'
src = cv.imread(cv.samples.findFile(filename))
# src = image_resize(src, height=1000)
gray_scale = get_grayscale(src)
bw = get_binary(gray_scale, threshold1=210, threshold2=255)
cv.imshow("bw", bw)
horizontal, vertical = detect_lines(bw, threshold=127, minLinLength=180, maxLineGap=6, display=True, write = False, grayscale = False)
preprocessing.py
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
import cv2 as cv
from ROI_selection import detect_lines, get_ROI
import numpy as np
def get_grayscale(image):
return cv.cvtColor(image, cv.COLOR_BGR2GRAY)
def get_binary(image, threshold1 = 100, threshold2 = 255, threshType = cv.THRESH_BINARY):
(thresh, blackAndWhiteImage) = cv.threshold(image, threshold1, threshold2, threshType)
return blackAndWhiteImage
def invert_area(image, x, y, w, h, display=False):
ones = np.copy(image)
ones = 1
image[ y:y+h , x:x+w ] = ones*255 - image[ y:y+h , x:x+w ]
if (display):
cv.imshow("inverted", image)
cv.waitKey(0)
cv.destroyAllWindows()
return image
def detect(cropped_frame, is_number = False):
if (is_number):
text = pytesseract.image_to_string(cropped_frame,
config ='-c tessedit_char_whitelist=0123456789 --psm 10 --oem 2')
else:
text = pytesseract.image_to_string(cropped_frame, config='--psm 10')
return text
def draw_text(src, x, y, w, h, text):
cFrame = np.copy(src)
cv.rectangle(cFrame, (x, y), (x+w, y+h), (255, 0, 0), 2)
cv.putText(cFrame, "text: " + text, (50, 50), cv.FONT_HERSHEY_SIMPLEX,
2, (0, 0, 0), 5, cv.LINE_AA)
return cFrame
def erode(img, kernel_size = 5):
kernel = np.ones((kernel_size,kernel_size), np.uint8)
img_erosion = cv.dilate(img, kernel, iterations=2)
return img_erosion
def image_resize(image, width = None, height = None, inter = cv.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv.resize(image, dim, interpolation = inter)
# return the resized image
return resized
ROI_selection.py
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 15:48:11 2020
@author: My Laptop
"""
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 14:19:57 2020
@author: My Laptop
"""
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 13:48:51 2020
@author: My Laptop
"""
"""
@file hough_lines.py
@brief This program demonstrates line finding with the Hough transform
"""
import sys
import math
import cv2 as cv
import numpy as np
def is_vertical(line):
return line[0]==line[2]
def is_horizontal(line):
return line[1]==line[3]
def overlapping_filter(lines, sorting_index):
filtered_lines = []
lines = sorted(lines, key=lambda lines: lines[sorting_index])
for i in range(len(lines)):
l_curr = lines[i]
if(i>0):
l_prev = lines[i-1]
if ( (l_curr[sorting_index] - l_prev[sorting_index]) > 5):
filtered_lines.append(l_curr)
else:
filtered_lines.append(l_curr)
return filtered_lines
def detect_lines(image, title='default', rho = 1, theta = np.pi/180, threshold = 50, minLinLength = 290, maxLineGap = 6, grayscale = True, display = False, write = False):
# Check if image is loaded fine
if grayscale:
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
else:
gray = image
if gray is None:
print ('Error opening image!')
return -1
dst = cv.Canny(gray, 50, 150, None, 3)
# Copy edges to the images that will display the results in BGR
cImage = np.copy(image)
#linesP = cv.HoughLinesP(dst, 1 , np.pi / 180, 50, None, 290, 6)
linesP = cv.HoughLinesP(dst, rho , theta, threshold, None, minLinLength, maxLineGap)
horizontal_lines = []
vertical_lines = []
if linesP is not None:
#for i in range(40, nb_lines):
for i in range(0, len(linesP)):
l = linesP[i][0]
if (is_vertical(l)):
vertical_lines.append(l)
elif (is_horizontal(l)):
horizontal_lines.append(l)
horizontal_lines = overlapping_filter(horizontal_lines, 1)
vertical_lines = overlapping_filter(vertical_lines, 0)
if (display):
for i, line in enumerate(horizontal_lines):
cv.line(cImage, (line[0], line[1]), (line[2], line[3]), (0,255,0), 3, cv.LINE_AA)
cv.putText(cImage, str(i) + "h", (line[0] + 5, line[1]), cv.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 0), 1, cv.LINE_AA)
for i, line in enumerate(vertical_lines):
cv.line(cImage, (line[0], line[1]), (line[2], line[3]), (0,0,255), 3, cv.LINE_AA)
cv.putText(cImage, str(i) + "v", (line[0], line[1] + 5), cv.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 0), 1, cv.LINE_AA)
cv.imshow("Source", cImage)
#cv.imshow("Canny", cdstP)
cv.waitKey(0)
cv.destroyAllWindows()
if (write):
cv.imwrite("../Images/" + title + ".png", cImage);
return (horizontal_lines, vertical_lines)
def get_cropped_image(image, x, y, w, h):
cropped_image = image[ y:y+h , x:x+w ]
return cropped_image
def get_ROI(image, horizontal, vertical, left_line_index, right_line_index, top_line_index, bottom_line_index, offset=4):
x1 = vertical[left_line_index][2] + offset
y1 = horizontal[top_line_index][3] + offset
x2 = vertical[right_line_index][2] - offset
y2 = horizontal[bottom_line_index][3] - offset
w = x2 - x1
h = y2 - y1
cropped_image = get_cropped_image(image, x1, y1, w, h)
return cropped_image, (x1, y1, w, h)
Here is input image:
Here is some output image:
Output image: (top) is binary image, (bottom) is binary image with lines from HoughLineP result.