I have a script that combines a bunch of PDFs into a single file, using PyPDF2, all good but on the company network is really slow. I then tried PyMuPdf and it is 100 times faster, but bookmarks and metadata are not copied automatically. Is there an argument to pass or something to say "while you are copying, also don't forget the bookmarks and metadata buddy"?
A bit of code here:
def pdfMerge(try_again):
start = time.time()
result = fitz.open()
for pdf in sorted_list:
print(pdf)
with fitz.open(pdf) as file_temp:
result.insert_pdf(file_temp)
if try_again == 0:
formatted_name = f"{job_number}-Combined Set-{date}.pdf"
else:
formatted_name = f"{job_number}-Combined Set-{date2}.pdf"
result.save(formatted_name)
end = time.time()
print(end - start)
return formatted_name
I am also open to other options such as pikepdf (which seems better supported).
Thanks!
EDIT: I changed the code:
def pdfMerge(try_again):
start = time.time()
toc = []
result = fitz.open()
for pdf in sorted_list:
print(pdf)
with fitz.open(pdf) as file_temp:
bookmarks = file_temp.get_toc()
file_temp.set_toc(bookmarks)
result.insert_pdf(file_temp)
print(bookmarks)
bookmarks = ''
if try_again == 0:
formatted_name = f"{job_number}-RGB-Combined Set-{date}.pdf"
else:
formatted_name = f"{job_number}-RGB-Combined Set-{date2}.pdf"
result.save(formatted_name)
end = time.time()
print(end - start)
return formatted_name
The print(bookmarks)
shows exactly what I need, but the combined PDF is still empty. What am I doing wrong?
EDIT 2: Here is my new function:
def pdfMerge(try_again):
start = time.time()
toc = []
result = fitz.open()
bookmarks_list = []
for pdf in sorted_list:
with fitz.open(pdf) as file_temp:
bookmarks = file_temp.get_toc()
print(bookmarks)
bookmarks_list.append(bookmarks)
result.insert_pdf(file_temp)
if try_again == 0:
formatted_name = f"{job_number}-RGB-Combined Set-{date}.pdf"
else:
formatted_name = f"{job_number}-RGB-Combined Set-{date2}.pdf"
print(bookmarks_list)
result.set_toc(bookmarks_list)
result.save(formatted_name)
end = time.time()
print(end - start)
return formatted_name
Which gives me this error:
File "C:\Users\Sav...\Coding_Python\PdfMerge\RBGPdfMerge.0.11.10.py", line 112, in <module>
pdfMerge(try_again)
File "C:\Users\Sav...\Coding_Python\PdfMerge\RBGPdfMerge.0.11.10.py", line 88, in pdfMerge
result.set_toc(bookmarks_list)
File "C:\Users\Sav...\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\fitz\utils.py", line 1325, in set_toc
raise ValueError("hierarchy level of item 0 must be 1")
ValueError: hierarchy level of item 0 must be 1
The same files are perfectly merged with pypdf
and pypdf2
.