I found a snippet in my Gists archive which might work for you - unfortunately it's not my code and I don't have the original source for it but in short this concept is using CV2 morphological transformations to first remove horizontal lines using horizontal kernel and later fill in missing pixels with a smaller vertical kernel.
You can try playing around with different kernels and parameters to see if you can get it to work better for your specific case. I would suggest to start with just removing horizontal lines and checking how Pyteseract handles it without any additional postprocessing. Notice that when we're filling in the missing pixels - some of the other texts become less readable.
import cv2
image = cv2.imread('./dumpster/dVbYd.jpg')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove lines using horizontal kernal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (255,255,255), 2)
# Repair image using smaller vertical kernel
repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 6))
result = 255 - cv2.morphologyEx(255 - image, cv2.MORPH_CLOSE, repair_kernel, iterations=1)
plt.figure(figsize=(15,10))
plt.imshow(gray, cmap='gray')
plt.show()
plt.figure(figsize=(15,10))
plt.imshow(image)
plt.show()
plt.figure(figsize=(15,10))
plt.imshow(result)
plt.show()
