0
Traceback (most recent call last):
  File "/Users/noelsjacob/Desktop/Projects/py_pdf_stm-master/TableExtractor.py", line 734, in <module>
    tables = pdf_interpreter.parse_page(1)
  File "/Users/noelsjacob/Desktop/Projects/py_pdf_stm-master/TableExtractor.py", line 641, in parse_page
    p_im = page.to_image(resolution=100)
  File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/pdfplumber/page.py", line 431, in to_image
    return PageImage(self, resolution=resolution or DEFAULT_RESOLUTION)
  File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/pdfplumber/display.py", line 95, in __init__
    self.original = get_page_image(
  File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/pdfplumber/display.py", line 56, in get_page_image
    with WandImage(
  File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/wand/image.py", line 9365, in __init__
    self.read(filename=filename)
  File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/wand/image.py", line 10120, in read
    self.raise_exception()
  File "/Users/noelsjacob/anaconda3/lib/python3.10/site-packages/wand/resource.py", line 225, in raise_exception
    raise e
wand.exceptions.MissingDelegateError: no decode delegate for this image format `' @ error/constitute.c/ReadImage/746

I'm getting that error while running the following Python code:

def parse_page(self, page_n):
    if self.debug:
        print('Parsing page', page_n)
    page = self.pdf.pages[page_n]
    if self.debug:
        print('Rendering page')

    if self.debug:
        print('Finding tables')
    tables = TableFinder(page, {'snap_tolerance': 3, 'join_tolerance': 3})
    if self.debug:
        print('Found', len(tables.tables), 'tables')
    beaut_tables = []
    if self.draw:
        p_im = page.to_image(resolution=100)
        p_im.draw_lines(page.lines)
        p_im.save('page-{}-lines.png'.format(page_n + 1))
    if len(tables.tables) > 5:
        return []
    for n, table in enumerate(tables.tables):
        if self.draw:
            p_im.reset()
            im = Image.new('RGB', (page.width, page.height), (255,) * 3)
            canvas = ImageDraw.ImageDraw(im)
        ugly_table = table.extract()
        lines = []  # type: List[Line]
        cells = []  # type: List[Cell]
        for cell in tqdm(table.cells, desc='Parsing cells', unit='cells'):
            # p_im.draw_rect(cell)
            x1, y1, x2, y2 = cell
            p1 = Point(x1, y1)
            p1.right = True
            p1.down = True
            p2 = Point(x2, y1)
            p2.left = True
            p2.down = True
            p3 = Point(x2, y2)
            p3.up = True
            p3.left = True
            p4 = Point(x1, y2)
            p4.up = True
            p4.right = True
            line1 = Line(p1, p2)
            line2 = Line(p2, p3)
            line3 = Line(p3, p4)
            line4 = Line(p4, p1)
            lines.append(line1)
            lines.append(line2)
            lines.append(line3)
            lines.append(line4)
            cell = Cell(p1, p2, p3, p4)
            cells.append(cell)

        # for line in lines:
        #     p_im.draw_line(line.as_tuple)
        lines = self.filter_lines(lines)
        # for line in lines:
        #     line.draw(canvas, color='green')
        if self.draw:
            p_im.save('page-{}-{}_im.png'.format(page_n + 1, n))
            im.save('page-{}-{}.png'.format(page_n + 1, n))
        skeleton_points, skeleton = self.build_skeleton(lines.copy())
        if not skeleton_points:
            continue
        skeleton = self.skeleton_to_2d_table(skeleton)

        # for p in points:
        #     p.draw(canvas)

        beaut_table = Table(cells, skeleton, ugly_table, page.extract_words())
        beaut_table.build_table()
        if self.draw:
            for cell in beaut_table.cells:
                cell.draw(canvas)
        if self.debug:
            print('Saving rendered table')
        if self.draw:
            p_im.save('page-{}-{}_im.png'.format(page_n + 1, n))
            im.save('page-{}-{}.png'.format(page_n + 1, n))
        if self.draw:
            canvas.rectangle((0,0,page.width,page.height),fill='white') #cleaning canvas
            for row_id, row in enumerate(skeleton):
                for cell_id, cell in enumerate(row):
                    cell.text = '{}-{}'.format(row_id, cell_id)
                    cell.draw(canvas, color='green',text_color='red')
            im.save('page-{}-{}-skeleton.png'.format(page_n + 1, n))
        beaut_tables.append(beaut_table)

    return beaut_tables

I tried to install the delegate library for this I installed libpng and I have the latest version of the wand but still I'm facing this issue. Also, I have a proper installation of ImageMagick and all the necessary libraries.

The libraries I'm using are: xlsxwriter, pdfplumber, pdfminer.six, pyparsing, Pillow, certifi2018.8.13, chardet3.0.4, idna2.7, PyPDF30.0.4, requests2.20.0, tqdm4.25.0, urllib31.24.2

This code is used to convert the PDF into images and thus extract the merged columns and rows.

user16217248
  • 3,119
  • 19
  • 19
  • 37
  • It is very difficult to answer your question without seeing any of your data nor any of the solution you have written which produces your problem. Please edit your question to show a minimal reproducible set consisting of sample input, expected output, actual output, and only the relevant code necessary to reproduce the problem. See [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example "Minimal Reproducible Example") for details on how to best help us help you. – itprorh66 Jun 13 '23 at 12:47

0 Answers0