Is there any code snippet that will work? I have tried this for converting pdf to html
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfpage import PDFPage
from pdfminer.converter import HTMLConverter, TextConverter
from pdfminer.layout import LAParams
import os
import contextlib
import tempfile
rsrcmgr = PDFResourceManager()
laparams = LAParams()
converter = HTMLConverter if format == 'html' else TextConverter
out_file = "A:\folder"
in_file = "A:\folder\pyhtml.html"
pdf_filename = 'insurance.pdf'
device = converter(rsrcmgr, out_file, codec='utf-8', laparams=laparams)
PDFPage.get_pages(rsrcmgr, device, in_file, pagenos=[1], maxpages=1)
with contextlib.closing(tempfile.NamedTemporaryFile(mode='r', suffix='.xml')) as xmlin:
cmd = 'pdftohtml -xml -nodrm -zoom 1.5 -enc UTF-8 -noframes "%s" "%s"' % (
pdf_filename, xmlin.name.rpartition('.')[0])
os.system(cmd + " >/dev/null 2>&1")
result = xmlin.read().decode('utf-8')
when i run the above code it gives me following erroe
Traceback (most recent call last):
File "a:\folder\new - Copy.py", line 14, in <module>
device = converter(rsrcmgr, out_file, codec='utf-8', laparams=laparams)
AttributeError: 'str' object has no attribute 'write'