I'm new at python and im trying to convert a the infos in a PDF file to EXCEL.
This is my code
import tabula
from tabula.io import read_pdf
import pandas as pd
from pandas import DataFrame
path = "C:/Users/Littl/OneDrive/Área de Trabalho/app/pdf/notafiscal.pdf"
PDF = tabula.read_pdf(path, pages='all', multiple_tables=True)
pdf_out_xlsv = "C:/Users/Littl/OneDrive/Área de Trabalho/app/pdf"
PDF = pd.DataFrame(PDF)
PDF.to_excel(pdf_out_xlsv, index=False)
print("Done")
and thats the error
Traceback (most recent call last):
File "c:\Users\Littl\OneDrive\Área de Trabalho\app\app.py", line 12, in
PDF = pd.DataFrame(PDF)
^^^^^^^^^^^^^^^^^
File "C:\Users\Littl\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\frame.py", line 798, in init
mgr = ndarray_to_mgr(
^^^^^^^^^^^^^^^
File "C:\Users\Littl\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 320, in ndarray_to_mgr
values = _prep_ndarraylike(values, copy=copy_on_sanitize)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Littl\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 553, in _prep_ndarraylike
values = np.array([convert(v) for v in values])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (7,) + inhomogeneous part.