Traceback (most recent call last):
File "/Users/noelsjacob/Desktop/Projects/py_pdf_stm-master/TableExtractor.py", line 734, in <module>
tables = pdf_interpreter.parse_page(1)
File "/Users/noelsjacob/Desktop/Projects/py_pdf_stm-master/TableExtractor.py", line 641, in parse_page
p_im = page.to_image(resolution=100)
File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/pdfplumber/page.py", line 431, in to_image
return PageImage(self, resolution=resolution or DEFAULT_RESOLUTION)
File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/pdfplumber/display.py", line 95, in __init__
self.original = get_page_image(
File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/pdfplumber/display.py", line 56, in get_page_image
with WandImage(
File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/wand/image.py", line 9365, in __init__
self.read(filename=filename)
File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/wand/image.py", line 10120, in read
self.raise_exception()
File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/wand/resource.py", line 225, in raise_exception
raise e
wand.exceptions.MissingDelegateError: no decode delegate for this image format `' @ error/constitute.c/ReadImage/746
I'm getting that error while running the following Python code:
def parse_page(self, page_n):
if self.debug:
print('Parsing page', page_n)
page = self.pdf.pages[page_n]
if self.debug:
print('Rendering page')
if self.debug:
print('Finding tables')
tables = TableFinder(page, {'snap_tolerance': 3, 'join_tolerance': 3})
if self.debug:
print('Found', len(tables.tables), 'tables')
beaut_tables = []
if self.draw:
p_im = page.to_image(resolution=100)
p_im.draw_lines(page.lines)
p_im.save('page-{}-lines.png'.format(page_n + 1))
if len(tables.tables) > 5:
return []
for n, table in enumerate(tables.tables):
if self.draw:
p_im.reset()
im = Image.new('RGB', (page.width, page.height), (255,) * 3)
canvas = ImageDraw.ImageDraw(im)
ugly_table = table.extract()
lines = [] # type: List[Line]
cells = [] # type: List[Cell]
for cell in tqdm(table.cells, desc='Parsing cells', unit='cells'):
# p_im.draw_rect(cell)
x1, y1, x2, y2 = cell
p1 = Point(x1, y1)
p1.right = True
p1.down = True
p2 = Point(x2, y1)
p2.left = True
p2.down = True
p3 = Point(x2, y2)
p3.up = True
p3.left = True
p4 = Point(x1, y2)
p4.up = True
p4.right = True
line1 = Line(p1, p2)
line2 = Line(p2, p3)
line3 = Line(p3, p4)
line4 = Line(p4, p1)
lines.append(line1)
lines.append(line2)
lines.append(line3)
lines.append(line4)
cell = Cell(p1, p2, p3, p4)
cells.append(cell)
# for line in lines:
# p_im.draw_line(line.as_tuple)
lines = self.filter_lines(lines)
# for line in lines:
# line.draw(canvas, color='green')
if self.draw:
p_im.save('page-{}-{}_im.png'.format(page_n + 1, n))
im.save('page-{}-{}.png'.format(page_n + 1, n))
skeleton_points, skeleton = self.build_skeleton(lines.copy())
if not skeleton_points:
continue
skeleton = self.skeleton_to_2d_table(skeleton)
# for p in points:
# p.draw(canvas)
beaut_table = Table(cells, skeleton, ugly_table, page.extract_words())
beaut_table.build_table()
if self.draw:
for cell in beaut_table.cells:
cell.draw(canvas)
if self.debug:
print('Saving rendered table')
if self.draw:
p_im.save('page-{}-{}_im.png'.format(page_n + 1, n))
im.save('page-{}-{}.png'.format(page_n + 1, n))
if self.draw:
canvas.rectangle((0,0,page.width,page.height),fill='white') #cleaning canvas
for row_id, row in enumerate(skeleton):
for cell_id, cell in enumerate(row):
cell.text = '{}-{}'.format(row_id, cell_id)
cell.draw(canvas, color='green',text_color='red')
im.save('page-{}-{}-skeleton.png'.format(page_n + 1, n))
beaut_tables.append(beaut_table)
return beaut_tables
I tried to install the delegate library for this I installed libpng and I have the latest version of the wand but still I'm facing this issue. Also, I have a proper installation of ImageMagick and all the necessary libraries.
The libraries I'm using are: xlsxwriter
, pdfplumber
, pdfminer.six
, pyparsing
, Pillow
, certifi2018.8.13
, chardet3.0.4
, idna2.7
, PyPDF30.0.4
, requests2.20.0
, tqdm4.25.0
, urllib31.24.2
This code is used to convert the PDF into images and thus extract the merged columns and rows.